11. flox: Advanced Tools for GroupBy and Resample

11. flox: Advanced Tools for GroupBy and Resample#

Flox is a groupby package designed for xarray and dask. In fact, the built-in groupby function in xarray is based on Flox. Using Flox allows more flexibility for grouping and aggregation, especially when working with large datasets or custom grouping logic. In this chapter, we will mainly introduce how to use xarray in flox.

The syntax of flox.xarray.xarray_reduce is as follow:

flox.xarray.xarray_reduce(obj, *by, func, expected_groups=None, isbin=False, sort=True, dim=None, fill_value=None, dtype=None, method=None, engine=None, keep_attrs=True, skipna=None, min_count=None, reindex=None, **finalize_kwargs)

We will use several examples to show how to set these options.

Example 1: Calculate the OLR climatology.

import xarray as xr 
import numpy as np
from flox.xarray import xarray_reduce 

olr = xr.open_dataset('./data/olr.nc').olr
olr
<xarray.DataArray 'olr' (time: 8760, lat: 90, lon: 360)> Size: 1GB
[283824000 values with dtype=float32]
Coordinates:
  * time     (time) datetime64[ns] 70kB 1998-01-01 1998-01-02 ... 2021-12-31
  * lon      (lon) float32 1kB 0.5 1.5 2.5 3.5 4.5 ... 356.5 357.5 358.5 359.5
  * lat      (lat) float32 360B -44.5 -43.5 -42.5 -41.5 ... 41.5 42.5 43.5 44.5
Attributes:
    standard_name:  toa_outgoing_longwave_flux
    long_name:      NOAA Climate Data Record of Daily Mean Upward Longwave Fl...
    units:          W m-2
    cell_methods:   time: mean area: mean
olr_MonClim = xarray_reduce(olr, 
                            olr.time.dt.month,
                            func='nanmean',
                            isbin=False,
                            dim='time')
olr_MonClim
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[2], line 1
----> 1 olr_MonClim = xarray_reduce(olr, 
      2                             olr.time.dt.month,
      3                             func='nanmean',
      4                             isbin=False,
      5                             dim='time')
      6 olr_MonClim

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/flox/xarray.py:410, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, fill_value, dtype, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
    407 output_sizes = group_sizes
    408 output_sizes.update({dim.name: dim.size for dim in newdims if dim.size != 0})
--> 410 actual = xr.apply_ufunc(
    411     wrapper,
    412     ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
    413     *by_da,
    414     input_core_dims=input_core_dims,
    415     # for xarray's test_groupby_duplicate_coordinate_labels
    416     exclude_dims=set(dim_tuple),
    417     output_core_dims=[output_core_dims],
    418     dask="allowed",
    419     dask_gufunc_kwargs=dict(
    420         output_sizes=output_sizes,
    421         output_dtypes=[dtype] if dtype is not None else None,
    422     ),
    423     keep_attrs=keep_attrs,
    424     kwargs={
    425         "func": func,
    426         "axis": axis,
    427         "sort": sort,
    428         "fill_value": fill_value,
    429         "method": method,
    430         "min_count": min_count,
    431         "skipna": skipna,
    432         "engine": engine,
    433         "reindex": reindex,
    434         "expected_groups": tuple(expected_groups_valid_list),
    435         "isbin": isbins,
    436         "finalize_kwargs": finalize_kwargs,
    437         "dtype": dtype,
    438         "core_dims": input_core_dims,
    439     },
    440 )
    442 # restore non-dim coord variables without the core dimension
    443 # TODO: shouldn't apply_ufunc handle this?
    444 for var in set(ds_broad._coord_names) - set(ds_broad._indexes) - set(ds_broad.dims):

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:1258, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, on_missing_core_dim, *args)
   1256 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1257 elif any(is_dict_like(a) for a in args):
-> 1258     return apply_dataset_vfunc(
   1259         variables_vfunc,
   1260         *args,
   1261         signature=signature,
   1262         join=join,
   1263         exclude_dims=exclude_dims,
   1264         dataset_join=dataset_join,
   1265         fill_value=dataset_fill_value,
   1266         keep_attrs=keep_attrs,
   1267         on_missing_core_dim=on_missing_core_dim,
   1268     )
   1269 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
   1270 elif any(isinstance(a, DataArray) for a in args):

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:529, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, on_missing_core_dim, *args)
    524 list_of_coords, list_of_indexes = build_output_coords_and_indexes(
    525     args, signature, exclude_dims, combine_attrs=keep_attrs
    526 )
    527 args = tuple(getattr(arg, "data_vars", arg) for arg in args)
--> 529 result_vars = apply_dict_of_variables_vfunc(
    530     func,
    531     *args,
    532     signature=signature,
    533     join=dataset_join,
    534     fill_value=fill_value,
    535     on_missing_core_dim=on_missing_core_dim,
    536 )
    538 out: Dataset | tuple[Dataset, ...]
    539 if signature.num_outputs > 1:

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:453, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, on_missing_core_dim, *args)
    451 core_dim_present = _check_core_dims(signature, variable_args, name)
    452 if core_dim_present is True:
--> 453     result_vars[name] = func(*variable_args)
    454 else:
    455     if on_missing_core_dim == "raise":

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:733, in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
    728 broadcast_dims = tuple(
    729     dim for dim in dim_sizes if dim not in signature.all_core_dims
    730 )
    731 output_dims = [broadcast_dims + out for out in signature.output_core_dims]
--> 733 input_data = [
    734     (
    735         broadcast_compat_data(arg, broadcast_dims, core_dims)
    736         if isinstance(arg, Variable)
    737         else arg
    738     )
    739     for arg, core_dims in zip(args, signature.input_core_dims, strict=True)
    740 ]
    742 if any(is_chunked_array(array) for array in input_data):
    743     if dask == "forbidden":

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:735, in <listcomp>(.0)
    728 broadcast_dims = tuple(
    729     dim for dim in dim_sizes if dim not in signature.all_core_dims
    730 )
    731 output_dims = [broadcast_dims + out for out in signature.output_core_dims]
    733 input_data = [
    734     (
--> 735         broadcast_compat_data(arg, broadcast_dims, core_dims)
    736         if isinstance(arg, Variable)
    737         else arg
    738     )
    739     for arg, core_dims in zip(args, signature.input_core_dims, strict=True)
    740 ]
    742 if any(is_chunked_array(array) for array in input_data):
    743     if dask == "forbidden":

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:656, in broadcast_compat_data(variable, broadcast_dims, core_dims)
    651 def broadcast_compat_data(
    652     variable: Variable,
    653     broadcast_dims: tuple[Hashable, ...],
    654     core_dims: tuple[Hashable, ...],
    655 ) -> Any:
--> 656     data = variable.data
    658     old_dims = variable.dims
    659     new_dims = broadcast_dims + core_dims

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/variable.py:415, in Variable.data(self)
    413     return self._data
    414 elif isinstance(self._data, indexing.ExplicitlyIndexed):
--> 415     return self._data.get_duck_array()
    416 else:
    417     return self.values

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:734, in LazilyVectorizedIndexedArray.get_duck_array(self)
    729 # self.array[self.key] is now a numpy array when
    730 # self.array is a BackendArray subclass
    731 # and self.key is BasicIndexer((slice(None, None, None),))
    732 # so we need the explicit check for ExplicitlyIndexed
    733 if isinstance(array, ExplicitlyIndexed):
--> 734     array = array.get_duck_array()
    735 return _wrap_numpy_scalars(array)

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/coding/variables.py:81, in _ElementwiseFunctionArray.get_duck_array(self)
     80 def get_duck_array(self):
---> 81     return self.func(self.array.get_duck_array())

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:728, in LazilyVectorizedIndexedArray.get_duck_array(self)
    724     array = apply_indexer(self.array, self.key)
    725 else:
    726     # If the array is not an ExplicitlyIndexedNDArrayMixin,
    727     # it may wrap a BackendArray so use its __getitem__
--> 728     array = self.array[self.key]
    729 # self.array[self.key] is now a numpy array when
    730 # self.array is a BackendArray subclass
    731 # and self.key is BasicIndexer((slice(None, None, None),))
    732 # so we need the explicit check for ExplicitlyIndexed
    733 if isinstance(array, ExplicitlyIndexed):

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:103, in NetCDF4ArrayWrapper.__getitem__(self, key)
    102 def __getitem__(self, key):
--> 103     return indexing.explicit_indexing_adapter(
    104         key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
    105     )

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:1012, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
    985 def explicit_indexing_adapter(
    986     key: ExplicitIndexer,
    987     shape: _Shape,
    988     indexing_support: IndexingSupport,
    989     raw_indexing_method: Callable[..., Any],
    990 ) -> Any:
    991     """Support explicit indexing by delegating to a raw indexing method.
    992 
    993     Outer and/or vectorized indexers are supported by indexing a second time
   (...)
   1010     Indexing result, in the form of a duck numpy-array.
   1011     """
-> 1012     raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
   1013     result = raw_indexing_method(raw_key.tuple)
   1014     if numpy_indices.tuple:
   1015         # index the loaded np.ndarray

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:1045, in decompose_indexer(indexer, shape, indexing_support)
   1041 def decompose_indexer(
   1042     indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport
   1043 ) -> tuple[ExplicitIndexer, ExplicitIndexer]:
   1044     if isinstance(indexer, VectorizedIndexer):
-> 1045         return _decompose_vectorized_indexer(indexer, shape, indexing_support)
   1046     if isinstance(indexer, BasicIndexer | OuterIndexer):
   1047         return _decompose_outer_indexer(indexer, shape, indexing_support)

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:1142, in _decompose_vectorized_indexer(indexer, shape, indexing_support)
   1138     np_indexer_elems.append(np_slice)
   1139 else:
   1140     # If it is a (multidimensional) np.ndarray, just pickup the used
   1141     # keys without duplication and store them as a 1d-np.ndarray.
-> 1142     oind, vind = np.unique(k, return_inverse=True)
   1143     backend_indexer_elems.append(oind)
   1144     np_indexer_elems.append(vind.reshape(*k.shape))

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/numpy/lib/_arraysetops_impl.py:291, in unique(ar, return_index, return_inverse, return_counts, axis, equal_nan)
    289 ar = np.asanyarray(ar)
    290 if axis is None:
--> 291     ret = _unique1d(ar, return_index, return_inverse, return_counts, 
    292                     equal_nan=equal_nan, inverse_shape=ar.shape, axis=None)
    293     return _unpack_tuple(ret)
    295 # axis was specified and not None

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/numpy/lib/_arraysetops_impl.py:377, in _unique1d(ar, return_index, return_inverse, return_counts, equal_nan, inverse_shape, axis)
    374     mask[1:] = aux[1:] != aux[:-1]
    376 ret = (aux[mask],)
--> 377 if return_index:
    378     ret += (perm[mask],)
    379 if return_inverse:

KeyboardInterrupt: 
  • olr: The DataArray variable for which we want to calculate the climatology.

  • olr.time.dt.month: Specifies how to group the data. Flox will group all values with the same month together.

  • func='nanmean': The statistical function applied to each group. Flox supports many functions, including:
    “all”, “any”, “count”, “sum”, “nansum”, “mean”, “nanmean”,
    “max”, “nanmax”, “min”, “nanmin”, “argmax”, “nanargmax”,
    “argmin”, “nanargmin”, “quantile”, “nanquantile”,
    “median”, “nanmedian”, “mode”, “nanmode”,
    “first”, “nanfirst”, “last”, “nanlast”

  • isbin: Determines whether to group values by predefined bins (expected_groups). In this example, we simply group by the natural values in the data (i.e., 12 months). Usage of isbin will be shown in later examples.

  • dim='time': Specifies the dimension along which to perform the aggregation (here, the time dimension).

Example 2: 1-D probability distribution. Plot the probability distribution of daily precipitation rate.

Step 1: Read data.

pcp = xr.open_dataarray('./data/cmorph_sample.nc').sel(lat=slice(-15,15),lon=slice(90,160))
pcp_djf = pcp.sel(time=pcp.time.dt.month.isin([12,1,2]))
pcp_djf
<xarray.DataArray 'cmorph' (time: 2166, lat: 120, lon: 280)> Size: 291MB
[72777600 values with dtype=float32]
Coordinates:
  * time     (time) datetime64[ns] 17kB 1998-01-01 1998-01-02 ... 2021-12-31
  * lon      (lon) float32 1kB 90.12 90.38 90.62 90.88 ... 159.4 159.6 159.9
  * lat      (lat) float32 480B -14.88 -14.62 -14.38 ... 14.38 14.62 14.88
Attributes:
    standard_name:  lwe_precipitation_rate
    long_name:      precipitation
    units:          mm/day
    ver_note:       1998-2020: V1,0; 2021: V0.x.
    comment:        !!! CMORPH estimate is rainrate !!!

Step 2: Count number of occurrence for each precipitation bin with flox. Technically, the precipitation is grouped by precipitation.

pdf = xarray_reduce(pcp_djf,
                    pcp_djf,
                    func='count',
                    isbin=True,
                    expected_groups=np.geomspace(0.5, 50, 20),
                    dim=['time','lat','lon'])
pdf_percent = pdf / pdf.sum() * 100.
pdf_percent
<xarray.DataArray 'cmorph' (cmorph_bins: 19)> Size: 152B
array([3.09464685, 4.98283472, 4.17693794, 5.09792383, 4.33013187,
       5.83681144, 5.57525959, 5.37691207, 6.11883881, 5.90911831,
       5.98125898, 6.33805064, 6.27095539, 6.25657   , 6.10623415,
       5.71656116, 5.11653848, 4.35264337, 3.36177241])
Coordinates:
  * cmorph_bins  (cmorph_bins) object 152B (0.5, 0.6371374928515668] ... (39....
  • pcp_djf is the variable being grouped — we want to group precipitation values by rain rate (the second pcp_djf).

  • func='count': Counts how many values fall into each bin (i.e., frequency of rainfall in each intensity range).

  • isbin=True: Indicates that we want to group precipitation values into bins, not by exact values.

  • expected_groups=np.geomspace(0.1, 50, 20): Defines the bin edges. Here we use logarithmic spacing (geomspace) from 0.1 mm to 50 mm, divided into 20 bins, because precipitation distribution is usually logarithmic scale.

  • dim=['time','lat','lon']: Aggregates over all dimensions (time and space), so the result is a 1D histogram of rainfall intensity.

import matplotlib as mpl
from matplotlib import pyplot as plt

mpl.rcParams['figure.dpi'] = 150

fig, ax = plt.subplots(figsize=(5,4)) 
lineplt = pdf_percent.plot.line(x='cmorph_bins', marker='o', xscale='log',ax=ax)
ax.set_ylabel('Percentage (%)')
ax.set_xlabel(r'Rain rate (mm day$^{-1}$)')
Text(0.5, 0, 'Rain rate (mm day$^{-1}$)')
_images/7fdabe7c15705a7e42cc4af911def96e0d6fd64a84192d0f6288634a5fc7f44c.png

Example 3: 2-D Probability Distribution. Plot the 2-D probability distribution of annual mean rainfall, rainfall intensity, and fraction of rainy days (> 0.5 mm day \(^{-1}\) ).

These three metrics complement each other and provide a more complete picture of rainfall characteristics:

  • Annual mean rainfall (amount): the total rainfall, but it does not reveal whether it comes from frequent light rain or infrequent heavy rain.

  • Rainy days (frequency): shows how often it rains. High rainfall can result from many rainy days or from a few very wet days.

  • Rainfall intensity (amount per rainy day): distinguishes between light and heavy rain events.

Looking at these three variables together helps us understand how the total rainfall is distributed. This is important for interpreting climate regimes and hydrological impacts.

pcp_AnnualClm = pcp.mean(axis=0).rename('Annual_mean')
pcp_intensity = xr.where(pcp==0, np.nan, pcp).mean(axis=0,skipna=True).rename('Intensity') 
                # Discard no precipitation points before averaing. 
pcp_days = xr.where(pcp>0.5, 1, 0).mean(axis=0,skipna=True).rename('Rain_days')  
           # Fraction of rainy days (> 0.5 mm day-1)
RI_Clm_pdf = xarray_reduce(pcp_intensity,
                           pcp_intensity,pcp_AnnualClm,
                           func='count',
                           dim=['lat','lon'],
                           isbin=(True,True),
                           expected_groups=(np.geomspace(1, 20, 20), np.geomspace(1, 20, 20)))
Days_Clm_pdf = xarray_reduce(pcp_days,
                             pcp_days,pcp_AnnualClm,
                             func='count',
                             dim=['lat','lon'],
                             isbin=(True,True),
                             expected_groups=(np.arange(0,1.05,0.05), np.geomspace(0.5, 50, 20)))
  • pcp_intensity (first argument): the variable passed to be reduced.

  • pcp_intensity, pcp_AnnualClm: the two variables used for grouping. Flox will jointly bin data points according to rainfall intensity and annual mean rainfall.

  • func='count': counts how many grid cells fall into each (intensity, annual rainfall) bin.

  • dim=['lat','lon']: aggregation is applied over space to shape a histogram.

  • isbin=(True,True): indicates both grouping variables are treated as binned quantities. The bins are based on

  • expected_groups: specifies the bin edges. Here both variables use log-spaced bins from 1 to 20 mm/day, which gives higher resolution at smaller values where rainfall tends to vary more strongly.

Note

The first argument pcp_intensity in this example, since func='count', is not used for its actual values**. It only provides shape and alignment for flox.

However, if the purpose is to calculate a statistic of another variable x conditioned on rainfall intensity and annual mean bins, then the first argument determines which variable is being aggregated. In that case, its values become meaningful for the computation.

Result: RI_Clm_pdf is a 2D histogram showing how often certain combinations of rainfall intensity and annual mean rainfall occur across the spatial domain. This can be normalized to a probability distribution by dividing by the total count.

RI_Clm_pdf_percent   = RI_Clm_pdf / RI_Clm_pdf.sum() * 100.
Days_Clm_pdf_percent = Days_Clm_pdf / Days_Clm_pdf.sum() * 100.     
mpl.rcParams['figure.dpi'] = 150

fig, axes = plt.subplots(1,2,figsize=(8,5))
ax = axes.flatten()

RI_Clm_plt = RI_Clm_pdf_percent.plot.pcolormesh(x='Annual_mean_bins',y='Intensity_bins',
                                                xscale='log',yscale='log',
                                                cmap='viridis_r',ax=ax[0],
                                                vmin=0, vmax=10,
                                                extend='max',
                                                cbar_kwargs={'orientation': 'horizontal', 'aspect': 30, 'label': r'[%]'})
ax[0].set_title('(a) Rain Intensity to Clim. mean',loc='left')
ax[0].set_ylabel(r'Rainfall Intensity (mm day$^{-1}$)')
ax[0].set_xlabel(r'Clim. Rain (mm day$^{-1}$)')

Day_Clm_plt = Days_Clm_pdf_percent.plot.pcolormesh(x='Annual_mean_bins',y='Rain_days_bins',
                                                   xscale='log',
                                                   cmap='viridis_r',ax=ax[1],
                                                   vmin=0, vmax=10,
                                                   extend='max',
                                                   cbar_kwargs={'orientation': 'horizontal', 'aspect': 30, 'label': r'[%]'})      
ax[1].set_title('(b) Rain Days to Clim. mean',loc='left')        
ax[1].set_ylabel('Fraction of Rainy days')
ax[1].set_xlabel(r'Clim. Rain (mm day$^{-1}$)')
                                  
Text(0.5, 0, 'Clim. Rain (mm day$^{-1}$)')
_images/91e67e8950173a6ae8f2adc3f1427c169c510c08608a93fa2726aae5018814ea.png

Example 4: Composite Mean. Plot rainfall composite mean for each MJO phase.

Step 1: Read MJO phase data from IRI data library.

import pandas as pd

# Read MJO data
mjo_ds = xr.open_dataset('http://iridl.ldeo.columbia.edu/SOURCES/.BoM/.MJO/.RMM/dods',
                         decode_times=False)

T = mjo_ds.T.values
mjo_ds['T'] = pd.date_range("1974-06-01", periods=len(T))  # Data starts from 1974-06-01

mjo_sig_phase = xr.where(mjo_ds.amplitude>=1,  mjo_ds.phase, 0).rename('mjo_phase') 
                      # Only significant (amplitude >= 1) MJO events are preserved.  
                      
mjo_slice = mjo_sig_phase.sel(T=slice("1998-01-01","2021-12-31"))
mjo_djf = mjo_slice.sel(T=mjo_slice['T'].dt.month.isin([12, 1, 2])).rename({'T':'time'})
syntax error, unexpected WORD_WORD, expecting ';' or ','
context: Attributes { T { String calendar "standard"; Int32 expires 1755820800; String standard_name "time"; Float32 pointwidth 1.0; Int32 gridtype 0; String units "julian_day"; } amplitude { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } phase { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 999.0; } RMM1 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } RMM2 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; }NC_GLOBAL { String references "Wheeler_Hendon2004"; Int32 expires 1755820800; URL Wheeler and Hendon^ (2004) Monthly Weather Review article "http://journals.ametsoc.org/doi/abs/10.1175/1520-0493(2004)132%3C1917:AARMMI%3E2.0.CO;2"; String description "Real-time Multivariate MJO Index (with components of interannual variability removed)"; URL summary from BoM "http://www.bom.gov.au/climate/mjo/"; URL data source "http://www.bom.gov.au/climate/mjo/graphics/rmm.74toRealtime.txt"; String Conventions "IRIDL";}}
Illegal attribute
context: Attributes { T { String calendar "standard"; Int32 expires 1755820800; String standard_name "time"; Float32 pointwidth 1.0; Int32 gridtype 0; String units "julian_day"; } amplitude { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } phase { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 999.0; } RMM1 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } RMM2 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; }NC_GLOBAL { String references "Wheeler_Hendon2004"; Int32 expires 1755820800; URL Wheeler and Hendon^ (2004) Monthly Weather Review article "http://journals.ametsoc.org/doi/abs/10.1175/1520-0493(2004)132%3C1917:AARMMI%3E2.0.CO;2"; String description "Real-time Multivariate MJO Index (with components of interannual variability removed)"; URL summary from BoM "http://www.bom.gov.au/climate/mjo/"; URL data source "http://www.bom.gov.au/climate/mjo/graphics/rmm.74toRealtime.txt"; String Conventions "IRIDL";}}

Step 2: Group the precipitation data based on the MJO phase.

mjo_phase_comp = xarray_reduce(pcp_djf,
                               mjo_djf,
                               func='nanmean',
                               dim='time',
                               isbin=False,
                               )
mjo_phase_comp
<xarray.DataArray 'cmorph' (mjo_phase: 9, lat: 120, lon: 280)> Size: 1MB
array([[[1.5509361 , 1.5476524 , 1.5360179 , ..., 6.1897774 ,
         6.044517  , 5.838945  ],
        [1.644101  , 1.5174888 , 1.6073403 , ..., 6.6764784 ,
         6.6883655 , 6.3710103 ],
        [1.7099406 , 1.5615454 , 1.6053046 , ..., 7.1074147 ,
         7.0514264 , 6.989376  ],
        ...,
        [1.2247697 , 1.2321248 , 1.0660772 , ..., 0.9790045 ,
         0.91200596, 0.8479941 ],
        [0.9963893 , 0.90916795, 0.7632095 , ..., 1.0414264 ,
         0.996211  , 0.9376969 ],
        [0.74106985, 0.6703715 , 0.6609807 , ..., 1.0852897 ,
         1.0059881 , 0.92294204]],

       [[2.1850576 , 2.1586208 , 2.3988507 , ..., 5.4770117 ,
         4.7149425 , 4.7413793 ],
        [2.2839081 , 2.4195402 , 2.4873564 , ..., 5.5850577 ,
         4.9080462 , 5.1747127 ],
        [2.4034483 , 3.0367815 , 3.254023  , ..., 5.462069  ,
         5.521839  , 5.8977013 ],
...
        [0.2897544 , 0.26273686, 0.29249123, ..., 0.74992985,
         0.7251579 , 0.681193  ],
        [0.24196492, 0.19701755, 0.1731579 , ..., 0.7130176 ,
         0.6249825 , 0.6207018 ],
        [0.16712281, 0.14017543, 0.15249123, ..., 0.62736845,
         0.57480705, 0.5857895 ]],

       [[1.9496454 , 1.8248227 , 1.8163121 , ..., 4.546099  ,
         4.824823  , 5.4425535 ],
        [1.9907802 , 2.1276596 , 2.283688  , ..., 4.7184396 ,
         5.012766  , 5.05461   ],
        [2.555319  , 2.703546  , 2.8758867 , ..., 5.4425535 ,
         5.6014185 , 5.35461   ],
        ...,
        [0.75886524, 0.72269505, 0.57588655, ..., 3.2198582 ,
         3.0680852 , 3.1226952 ],
        [0.8056738 , 0.6815603 , 0.55602837, ..., 3.7794328 ,
         3.487234  , 3.0744681 ],
        [0.5156028 , 0.56950355, 0.5382979 , ..., 3.7475178 ,
         3.7709222 , 3.3829787 ]]], dtype=float32)
Coordinates:
  * lon        (lon) float32 1kB 90.12 90.38 90.62 90.88 ... 159.4 159.6 159.9
  * lat        (lat) float32 480B -14.88 -14.62 -14.38 ... 14.38 14.62 14.88
  * mjo_phase  (mjo_phase) float32 36B 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0
Attributes:
    standard_name:  lwe_precipitation_rate
    long_name:      precipitation
    units:          mm/day
    ver_note:       1998-2020: V1,0; 2021: V0.x.
    comment:        !!! CMORPH estimate is rainrate !!!
  • In this example, precipitation data pcp_djf is grouped by each MJO phase mjo_djf. For each MJO phase group, func='nanmean' calculates the mean precipitation over time dimension (dim='time'). That is, all time steps belonging to the same MJO phase are averaged together.

  • isbin=False: Means we are grouping by the exact values of mjo_djf (the phase categories 1–8), not by bins.

Step 3: Plotting.

import cmaps
from cartopy import crs as ccrs   
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

mpl.rcParams['figure.dpi'] = 150

fig, axes = plt.subplots(4,2, 
                         subplot_kw={'projection': ccrs.PlateCarree()},
                         figsize=(10,11))
ax = axes.flatten()
lon_formatter = LONGITUDE_FORMATTER
lat_formatter = LATITUDE_FORMATTER

clevs = [0.5,1,2.5,5,7.5,10,15,20,25]

porder = [0,2,4,6,1,3,5,7]

for i in range(1,9): 
    cf = (mjo_phase_comp[i,:,:].plot.contourf(x='lon',y='lat', ax=ax[porder[i-1]],
                                         levels=clevs,
                                         add_colorbar=False,
                                         cmap=cmaps.precip_11lev, 
                                         extend='both',
                                         transform=ccrs.PlateCarree()))
    ax[porder[i-1]].coastlines()
    ax[porder[i-1]].set_extent([90,160,-15,16],crs=ccrs.PlateCarree())
    ax[porder[i-1]].set_xticks(np.arange(100,180,20), crs=ccrs.PlateCarree())
    ax[porder[i-1]].set_yticks(np.arange(-15,30,15), crs=ccrs.PlateCarree())   # 設定x, y座標的範圍,以及多少經緯度繪製刻度。
    ax[porder[i-1]].xaxis.set_major_formatter(lon_formatter)
    ax[porder[i-1]].yaxis.set_major_formatter(lat_formatter)
    ax[porder[i-1]].set_xlabel(' ')
    ax[porder[i-1]].set_ylabel(' ')
    ax[porder[i-1]].set_title(' ')
    ax[porder[i-1]].set_title('Phase '+str(i), loc='left')
    
# Add a colorbar axis at the bottom of the graph
cbar_ax = fig.add_axes([0.2, 0.07, 0.6, 0.015])
# Draw the colorbar 將colorbar畫在cbar_ax這個軸上。
cbar = fig.colorbar(cf, cax=cbar_ax,
                    orientation='horizontal',
                    ticks=clevs,
                    label=r'[mm hr$^{-1}$]')  

plt.subplots_adjust(hspace=0.15)
plt.suptitle('MJO Composite in DJF',y=0.92,size='large',weight='bold')
plt.show()    
_images/1c5b06aebcfb1a0429ce8a94ad091a525a76564ccc37a9760568a85cf2b3c421.png