11. flox: Advanced Tools for GroupBy and Resample

11. `flox`: Advanced Tools for GroupBy and Resample#

Flox is a groupby package designed for xarray and dask. In fact, the built-in groupby function in xarray is based on Flox. Using Flox allows more flexibility for grouping and aggregation, especially when working with large datasets or custom grouping logic. In this chapter, we will mainly introduce how to use xarray in flox.

The syntax of flox.xarray.xarray_reduce is as follow:

flox.xarray.xarray_reduce(obj, *by, func, expected_groups=None, isbin=False, sort=True, dim=None, fill_value=None, dtype=None, method=None, engine=None, keep_attrs=True, skipna=None, min_count=None, reindex=None, **finalize_kwargs)

We will use several examples to show how to set these options.

Example 1: Calculate the OLR climatology.

olr_MonClim = xarray_reduce(olr, 
                            olr.time.dt.month,
                            func='nanmean',
                            isbin=False,
                            dim='time')
olr_MonClim

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[2], line 1
----> 1 olr_MonClim = xarray_reduce(olr, 
                           olr.time.dt.month,
                           func='nanmean',
                           isbin=False,
                           dim='time')
olr_MonClim

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/flox/xarray.py:410, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, fill_value, dtype, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
output_sizes = group_sizes
output_sizes.update({dim.name: dim.size for dim in newdims if dim.size != 0})
--> 410 actual = xr.apply_ufunc(
   wrapper,
   ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
   *by_da,
   input_core_dims=input_core_dims,
   # for xarray's test_groupby_duplicate_coordinate_labels
   exclude_dims=set(dim_tuple),
   output_core_dims=[output_core_dims],
   dask="allowed",
   dask_gufunc_kwargs=dict(
       output_sizes=output_sizes,
       output_dtypes=[dtype] if dtype is not None else None,
   ),
   keep_attrs=keep_attrs,
   kwargs={
       "func": func,
       "axis": axis,
       "sort": sort,
       "fill_value": fill_value,
       "method": method,
       "min_count": min_count,
       "skipna": skipna,
       "engine": engine,
       "reindex": reindex,
       "expected_groups": tuple(expected_groups_valid_list),
       "isbin": isbins,
       "finalize_kwargs": finalize_kwargs,
       "dtype": dtype,
       "core_dims": input_core_dims,
   },
)
# restore non-dim coord variables without the core dimension
# TODO: shouldn't apply_ufunc handle this?
for var in set(ds_broad._coord_names) - set(ds_broad._indexes) - set(ds_broad.dims):

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:1258, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, on_missing_core_dim, *args)
# feed datasets apply_variable_ufunc through apply_dataset_vfunc
elif any(is_dict_like(a) for a in args):
-> 1258     return apply_dataset_vfunc(
       variables_vfunc,
       *args,
       signature=signature,
       join=join,
       exclude_dims=exclude_dims,
       dataset_join=dataset_join,
       fill_value=dataset_fill_value,
       keep_attrs=keep_attrs,
       on_missing_core_dim=on_missing_core_dim,
   )
# feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
elif any(isinstance(a, DataArray) for a in args):

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:529, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, on_missing_core_dim, *args)
list_of_coords, list_of_indexes = build_output_coords_and_indexes(
   args, signature, exclude_dims, combine_attrs=keep_attrs
)
args = tuple(getattr(arg, "data_vars", arg) for arg in args)
--> 529 result_vars = apply_dict_of_variables_vfunc(
   func,
   *args,
   signature=signature,
   join=dataset_join,
   fill_value=fill_value,
   on_missing_core_dim=on_missing_core_dim,
)
out: Dataset | tuple[Dataset, ...]
if signature.num_outputs > 1:

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:453, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, on_missing_core_dim, *args)
core_dim_present = _check_core_dims(signature, variable_args, name)
if core_dim_present is True:
--> 453     result_vars[name] = func(*variable_args)
else:
   if on_missing_core_dim == "raise":

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:733, in apply_variable_ufunc(func, signature, exclude_dims, dask, output_dtypes, vectorize, keep_attrs, dask_gufunc_kwargs, *args)
broadcast_dims = tuple(
   dim for dim in dim_sizes if dim not in signature.all_core_dims
)
output_dims = [broadcast_dims + out for out in signature.output_core_dims]
--> 733 input_data = [
   (
       broadcast_compat_data(arg, broadcast_dims, core_dims)
       if isinstance(arg, Variable)
       else arg
   )
   for arg, core_dims in zip(args, signature.input_core_dims, strict=True)
]
if any(is_chunked_array(array) for array in input_data):
   if dask == "forbidden":

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:735, in <listcomp>(.0)
broadcast_dims = tuple(
   dim for dim in dim_sizes if dim not in signature.all_core_dims
)
output_dims = [broadcast_dims + out for out in signature.output_core_dims]
input_data = [
   (
--> 735         broadcast_compat_data(arg, broadcast_dims, core_dims)
       if isinstance(arg, Variable)
       else arg
   )
   for arg, core_dims in zip(args, signature.input_core_dims, strict=True)
]
if any(is_chunked_array(array) for array in input_data):
   if dask == "forbidden":

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/computation.py:656, in broadcast_compat_data(variable, broadcast_dims, core_dims)
def broadcast_compat_data(
   variable: Variable,
   broadcast_dims: tuple[Hashable, ...],
   core_dims: tuple[Hashable, ...],
) -> Any:
--> 656     data = variable.data
   old_dims = variable.dims
   new_dims = broadcast_dims + core_dims

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/variable.py:415, in Variable.data(self)
   return self._data
elif isinstance(self._data, indexing.ExplicitlyIndexed):
--> 415     return self._data.get_duck_array()
else:
   return self.values

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:734, in LazilyVectorizedIndexedArray.get_duck_array(self)
# self.array[self.key] is now a numpy array when
# self.array is a BackendArray subclass
# and self.key is BasicIndexer((slice(None, None, None),))
# so we need the explicit check for ExplicitlyIndexed
if isinstance(array, ExplicitlyIndexed):
--> 734     array = array.get_duck_array()
return _wrap_numpy_scalars(array)

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/coding/variables.py:81, in _ElementwiseFunctionArray.get_duck_array(self)
def get_duck_array(self):
---> 81     return self.func(self.array.get_duck_array())

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:728, in LazilyVectorizedIndexedArray.get_duck_array(self)
   array = apply_indexer(self.array, self.key)
else:
   # If the array is not an ExplicitlyIndexedNDArrayMixin,
   # it may wrap a BackendArray so use its __getitem__
--> 728     array = self.array[self.key]
# self.array[self.key] is now a numpy array when
# self.array is a BackendArray subclass
# and self.key is BasicIndexer((slice(None, None, None),))
# so we need the explicit check for ExplicitlyIndexed
if isinstance(array, ExplicitlyIndexed):

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/backends/netCDF4_.py:103, in NetCDF4ArrayWrapper.__getitem__(self, key)
def __getitem__(self, key):
--> 103     return indexing.explicit_indexing_adapter(
       key, self.shape, indexing.IndexingSupport.OUTER, self._getitem
   )

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:1012, in explicit_indexing_adapter(key, shape, indexing_support, raw_indexing_method)
def explicit_indexing_adapter(
   key: ExplicitIndexer,
   shape: _Shape,
   indexing_support: IndexingSupport,
   raw_indexing_method: Callable[..., Any],
) -> Any:
   """Support explicit indexing by delegating to a raw indexing method.

   Outer and/or vectorized indexers are supported by indexing a second time
   (...)
   Indexing result, in the form of a duck numpy-array.
   """
-> 1012     raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support)
   result = raw_indexing_method(raw_key.tuple)
   if numpy_indices.tuple:
       # index the loaded np.ndarray

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:1045, in decompose_indexer(indexer, shape, indexing_support)
def decompose_indexer(
   indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport
) -> tuple[ExplicitIndexer, ExplicitIndexer]:
   if isinstance(indexer, VectorizedIndexer):
-> 1045         return _decompose_vectorized_indexer(indexer, shape, indexing_support)
   if isinstance(indexer, BasicIndexer | OuterIndexer):
       return _decompose_outer_indexer(indexer, shape, indexing_support)

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/xarray/core/indexing.py:1142, in _decompose_vectorized_indexer(indexer, shape, indexing_support)
   np_indexer_elems.append(np_slice)
else:
   # If it is a (multidimensional) np.ndarray, just pickup the used
   # keys without duplication and store them as a 1d-np.ndarray.
-> 1142     oind, vind = np.unique(k, return_inverse=True)
   backend_indexer_elems.append(oind)
   np_indexer_elems.append(vind.reshape(*k.shape))

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/numpy/lib/_arraysetops_impl.py:291, in unique(ar, return_index, return_inverse, return_counts, axis, equal_nan)
ar = np.asanyarray(ar)
if axis is None:
--> 291     ret = _unique1d(ar, return_index, return_inverse, return_counts, 
                   equal_nan=equal_nan, inverse_shape=ar.shape, axis=None)
   return _unpack_tuple(ret)
# axis was specified and not None

File /data/wtsai/micromamba/p3t/lib/python3.10/site-packages/numpy/lib/_arraysetops_impl.py:377, in _unique1d(ar, return_index, return_inverse, return_counts, equal_nan, inverse_shape, axis)
   mask[1:] = aux[1:] != aux[:-1]
ret = (aux[mask],)
--> 377 if return_index:
   ret += (perm[mask],)
if return_inverse:

KeyboardInterrupt: 

olr: The DataArray variable for which we want to calculate the climatology.
olr.time.dt.month: Specifies how to group the data. Flox will group all values with the same month together.
func='nanmean': The statistical function applied to each group. Flox supports many functions, including:
“all”, “any”, “count”, “sum”, “nansum”, “mean”, “nanmean”,
“max”, “nanmax”, “min”, “nanmin”, “argmax”, “nanargmax”,
“argmin”, “nanargmin”, “quantile”, “nanquantile”,
“median”, “nanmedian”, “mode”, “nanmode”,
“first”, “nanfirst”, “last”, “nanlast”
isbin: Determines whether to group values by predefined bins (expected_groups). In this example, we simply group by the natural values in the data (i.e., 12 months). Usage of isbin will be shown in later examples.
dim='time': Specifies the dimension along which to perform the aggregation (here, the time dimension).

Example 2: 1-D probability distribution. Plot the probability distribution of daily precipitation rate.

Step 1: Read data.

Step 2: Count number of occurrence for each precipitation bin with flox. Technically, the precipitation is grouped by precipitation.

pdf = xarray_reduce(pcp_djf,
                    pcp_djf,
                    func='count',
                    isbin=True,
                    expected_groups=np.geomspace(0.5, 50, 20),
                    dim=['time','lat','lon'])
pdf_percent = pdf / pdf.sum() * 100.
pdf_percent

<xarray.DataArray 'cmorph' (cmorph_bins: 19)> Size: 152B
array([3.09464685, 4.98283472, 4.17693794, 5.09792383, 4.33013187,
       5.83681144, 5.57525959, 5.37691207, 6.11883881, 5.90911831,
       5.98125898, 6.33805064, 6.27095539, 6.25657   , 6.10623415,
       5.71656116, 5.11653848, 4.35264337, 3.36177241])
Coordinates:
  * cmorph_bins  (cmorph_bins) object 152B (0.5, 0.6371374928515668] ... (39....

xarray.DataArray

'cmorph'

cmorph_bins: 19

3.095 4.983 4.177 5.098 4.33 5.837 ... 6.106 5.717 5.117 4.353 3.362

array([3.09464685, 4.98283472, 4.17693794, 5.09792383, 4.33013187,
       5.83681144, 5.57525959, 5.37691207, 6.11883881, 5.90911831,
       5.98125898, 6.33805064, 6.27095539, 6.25657   , 6.10623415,
       5.71656116, 5.11653848, 4.35264337, 3.36177241])

Coordinates: (1)

cmorph_bins

(cmorph_bins)

object

(0.5, 0.6371374928515668] ... (3...

standard_name :: lwe_precipitation_rate
long_name :: precipitation
units :: mm/day
ver_note :: 1998-2020: V1,0; 2021: V0.x.
comment :: !!! CMORPH estimate is rainrate !!!

array([Interval(0.5, 0.6371374928515668, closed='right'),
       Interval(0.6371374928515668, 0.8118883695943608, closed='right'),
       Interval(0.8118883695943608, 1.0345690405573948, closed='right'),
       Interval(1.0345690405573948, 1.318325449365179, closed='right'),
       Interval(1.318325449365179, 1.6799091431418909, closed='right'),
       Interval(1.6799091431418909, 2.1406661993596967, closed='right'),
       Interval(2.1406661993596967, 2.7277973905842594, closed='right'),
       Interval(2.7277973905842594, 3.4759639808878022, closed='right'),
       Interval(3.4759639808878022, 4.429333952050413, closed='right'),
       Interval(4.429333952050413, 5.644189458423444, closed='right'),
       Interval(5.644189458423444, 7.192249441438314, closed='right'),
       Interval(7.192249441438314, 9.164903554162176, closed='right'),
       Interval(9.164903554162176, 11.678607345450605, closed='right'),
       Interval(11.678607345450605, 14.881757208156586, closed='right'),
       Interval(14.881757208156586, 18.963450953661248, closed='right'),
       Interval(18.963450953661248, 24.164651192858756, closed='right'),
       Interval(24.164651192858756, 30.7924105533013, closed='right'),
       Interval(30.7924105533013, 39.23799851757305, closed='right'),
       Interval(39.23799851757305, 50.0, closed='right')], dtype=object)

Indexes: (1)

cmorph_bins

PandasIndex

PandasIndex(Index([               (0.5, 0.6371374928515668],
       (0.6371374928515668, 0.8118883695943608],
       (0.8118883695943608, 1.0345690405573948],
        (1.0345690405573948, 1.318325449365179],
        (1.318325449365179, 1.6799091431418909],
       (1.6799091431418909, 2.1406661993596967],
       (2.1406661993596967, 2.7277973905842594],
       (2.7277973905842594, 3.4759639808878022],
        (3.4759639808878022, 4.429333952050413],
         (4.429333952050413, 5.644189458423444],
         (5.644189458423444, 7.192249441438314],
         (7.192249441438314, 9.164903554162176],
        (9.164903554162176, 11.678607345450605],
       (11.678607345450605, 14.881757208156586],
       (14.881757208156586, 18.963450953661248],
       (18.963450953661248, 24.164651192858756],
         (24.164651192858756, 30.7924105533013],
          (30.7924105533013, 39.23799851757305],
                      (39.23799851757305, 50.0]],
      dtype='object', name='cmorph_bins'))

Attributes: (0)

pcp_djf is the variable being grouped — we want to group precipitation values by rain rate (the second pcp_djf).
func='count': Counts how many values fall into each bin (i.e., frequency of rainfall in each intensity range).
isbin=True: Indicates that we want to group precipitation values into bins, not by exact values.
expected_groups=np.geomspace(0.1, 50, 20): Defines the bin edges. Here we use logarithmic spacing (geomspace) from 0.1 mm to 50 mm, divided into 20 bins, because precipitation distribution is usually logarithmic scale.
dim=['time','lat','lon']: Aggregates over all dimensions (time and space), so the result is a 1D histogram of rainfall intensity.

import matplotlib as mpl
from matplotlib import pyplot as plt

mpl.rcParams['figure.dpi'] = 150

fig, ax = plt.subplots(figsize=(5,4)) 
lineplt = pdf_percent.plot.line(x='cmorph_bins', marker='o', xscale='log',ax=ax)
ax.set_ylabel('Percentage (%)')
ax.set_xlabel(r'Rain rate (mm day$^{-1}$)')

Text(0.5, 0, 'Rain rate (mm day$^{-1}$)')

_images/7fdabe7c15705a7e42cc4af911def96e0d6fd64a84192d0f6288634a5fc7f44c.png

Example 3: 2-D Probability Distribution. Plot the 2-D probability distribution of annual mean rainfall, rainfall intensity, and fraction of rainy days (> 0.5 mm day \(^{-1}\) ).

These three metrics complement each other and provide a more complete picture of rainfall characteristics:

Annual mean rainfall (amount): the total rainfall, but it does not reveal whether it comes from frequent light rain or infrequent heavy rain.
Rainy days (frequency): shows how often it rains. High rainfall can result from many rainy days or from a few very wet days.
Rainfall intensity (amount per rainy day): distinguishes between light and heavy rain events.

Looking at these three variables together helps us understand how the total rainfall is distributed. This is important for interpreting climate regimes and hydrological impacts.

pcp_AnnualClm = pcp.mean(axis=0).rename('Annual_mean')
pcp_intensity = xr.where(pcp==0, np.nan, pcp).mean(axis=0,skipna=True).rename('Intensity') 
                # Discard no precipitation points before averaing. 
pcp_days = xr.where(pcp>0.5, 1, 0).mean(axis=0,skipna=True).rename('Rain_days')  
           # Fraction of rainy days (> 0.5 mm day-1)

RI_Clm_pdf = xarray_reduce(pcp_intensity,
                           pcp_intensity,pcp_AnnualClm,
                           func='count',
                           dim=['lat','lon'],
                           isbin=(True,True),
                           expected_groups=(np.geomspace(1, 20, 20), np.geomspace(1, 20, 20)))
Days_Clm_pdf = xarray_reduce(pcp_days,
                             pcp_days,pcp_AnnualClm,
                             func='count',
                             dim=['lat','lon'],
                             isbin=(True,True),
                             expected_groups=(np.arange(0,1.05,0.05), np.geomspace(0.5, 50, 20)))

pcp_intensity (first argument): the variable passed to be reduced.
pcp_intensity, pcp_AnnualClm: the two variables used for grouping. Flox will jointly bin data points according to rainfall intensity and annual mean rainfall.
func='count': counts how many grid cells fall into each (intensity, annual rainfall) bin.
dim=['lat','lon']: aggregation is applied over space to shape a histogram.
isbin=(True,True): indicates both grouping variables are treated as binned quantities. The bins are based on
expected_groups: specifies the bin edges. Here both variables use log-spaced bins from 1 to 20 mm/day, which gives higher resolution at smaller values where rainfall tends to vary more strongly.

Note

The first argument pcp_intensity in this example, since func='count', is not used for its actual values**. It only provides shape and alignment for flox.

However, if the purpose is to calculate a statistic of another variable x conditioned on rainfall intensity and annual mean bins, then the first argument determines which variable is being aggregated. In that case, its values become meaningful for the computation.

Result: RI_Clm_pdf is a 2D histogram showing how often certain combinations of rainfall intensity and annual mean rainfall occur across the spatial domain. This can be normalized to a probability distribution by dividing by the total count.

RI_Clm_pdf_percent   = RI_Clm_pdf / RI_Clm_pdf.sum() * 100.
Days_Clm_pdf_percent = Days_Clm_pdf / Days_Clm_pdf.sum() * 100.     

mpl.rcParams['figure.dpi'] = 150

fig, axes = plt.subplots(1,2,figsize=(8,5))
ax = axes.flatten()

RI_Clm_plt = RI_Clm_pdf_percent.plot.pcolormesh(x='Annual_mean_bins',y='Intensity_bins',
                                                xscale='log',yscale='log',
                                                cmap='viridis_r',ax=ax[0],
                                                vmin=0, vmax=10,
                                                extend='max',
                                                cbar_kwargs={'orientation': 'horizontal', 'aspect': 30, 'label': r'[%]'})
ax[0].set_title('(a) Rain Intensity to Clim. mean',loc='left')
ax[0].set_ylabel(r'Rainfall Intensity (mm day$^{-1}$)')
ax[0].set_xlabel(r'Clim. Rain (mm day$^{-1}$)')

Day_Clm_plt = Days_Clm_pdf_percent.plot.pcolormesh(x='Annual_mean_bins',y='Rain_days_bins',
                                                   xscale='log',
                                                   cmap='viridis_r',ax=ax[1],
                                                   vmin=0, vmax=10,
                                                   extend='max',
                                                   cbar_kwargs={'orientation': 'horizontal', 'aspect': 30, 'label': r'[%]'})      
ax[1].set_title('(b) Rain Days to Clim. mean',loc='left')        
ax[1].set_ylabel('Fraction of Rainy days')
ax[1].set_xlabel(r'Clim. Rain (mm day$^{-1}$)')
                                  

Text(0.5, 0, 'Clim. Rain (mm day$^{-1}$)')

_images/91e67e8950173a6ae8f2adc3f1427c169c510c08608a93fa2726aae5018814ea.png

Example 4: Composite Mean. Plot rainfall composite mean for each MJO phase.

Step 1: Read MJO phase data from IRI data library.

import pandas as pd

# Read MJO data
mjo_ds = xr.open_dataset('http://iridl.ldeo.columbia.edu/SOURCES/.BoM/.MJO/.RMM/dods',
                         decode_times=False)

T = mjo_ds.T.values
mjo_ds['T'] = pd.date_range("1974-06-01", periods=len(T))  # Data starts from 1974-06-01

mjo_sig_phase = xr.where(mjo_ds.amplitude>=1,  mjo_ds.phase, 0).rename('mjo_phase') 
                      # Only significant (amplitude >= 1) MJO events are preserved.  
                      
mjo_slice = mjo_sig_phase.sel(T=slice("1998-01-01","2021-12-31"))
mjo_djf = mjo_slice.sel(T=mjo_slice['T'].dt.month.isin([12, 1, 2])).rename({'T':'time'})

syntax error, unexpected WORD_WORD, expecting ';' or ','
context: Attributes { T { String calendar "standard"; Int32 expires 1755820800; String standard_name "time"; Float32 pointwidth 1.0; Int32 gridtype 0; String units "julian_day"; } amplitude { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } phase { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 999.0; } RMM1 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } RMM2 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; }NC_GLOBAL { String references "Wheeler_Hendon2004"; Int32 expires 1755820800; URL Wheeler and Hendon^ (2004) Monthly Weather Review article "http://journals.ametsoc.org/doi/abs/10.1175/1520-0493(2004)132%3C1917:AARMMI%3E2.0.CO;2"; String description "Real-time Multivariate MJO Index (with components of interannual variability removed)"; URL summary from BoM "http://www.bom.gov.au/climate/mjo/"; URL data source "http://www.bom.gov.au/climate/mjo/graphics/rmm.74toRealtime.txt"; String Conventions "IRIDL";}}
Illegal attribute
context: Attributes { T { String calendar "standard"; Int32 expires 1755820800; String standard_name "time"; Float32 pointwidth 1.0; Int32 gridtype 0; String units "julian_day"; } amplitude { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } phase { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 999.0; } RMM1 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; } RMM2 { Int32 expires 1755820800; String units "unitless"; Float32 missing_value 9.99999962E35; }NC_GLOBAL { String references "Wheeler_Hendon2004"; Int32 expires 1755820800; URL Wheeler and Hendon^ (2004) Monthly Weather Review article "http://journals.ametsoc.org/doi/abs/10.1175/1520-0493(2004)132%3C1917:AARMMI%3E2.0.CO;2"; String description "Real-time Multivariate MJO Index (with components of interannual variability removed)"; URL summary from BoM "http://www.bom.gov.au/climate/mjo/"; URL data source "http://www.bom.gov.au/climate/mjo/graphics/rmm.74toRealtime.txt"; String Conventions "IRIDL";}}

Step 2: Group the precipitation data based on the MJO phase.

In this example, precipitation data pcp_djf is grouped by each MJO phase mjo_djf. For each MJO phase group, func='nanmean' calculates the mean precipitation over time dimension (dim='time'). That is, all time steps belonging to the same MJO phase are averaged together.
isbin=False: Means we are grouping by the exact values of mjo_djf (the phase categories 1–8), not by bins.

Step 3: Plotting.

import cmaps
from cartopy import crs as ccrs   
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

mpl.rcParams['figure.dpi'] = 150

fig, axes = plt.subplots(4,2, 
                         subplot_kw={'projection': ccrs.PlateCarree()},
                         figsize=(10,11))
ax = axes.flatten()
lon_formatter = LONGITUDE_FORMATTER
lat_formatter = LATITUDE_FORMATTER

clevs = [0.5,1,2.5,5,7.5,10,15,20,25]

porder = [0,2,4,6,1,3,5,7]

for i in range(1,9): 
    cf = (mjo_phase_comp[i,:,:].plot.contourf(x='lon',y='lat', ax=ax[porder[i-1]],
                                         levels=clevs,
                                         add_colorbar=False,
                                         cmap=cmaps.precip_11lev, 
                                         extend='both',
                                         transform=ccrs.PlateCarree()))
    ax[porder[i-1]].coastlines()
    ax[porder[i-1]].set_extent([90,160,-15,16],crs=ccrs.PlateCarree())
    ax[porder[i-1]].set_xticks(np.arange(100,180,20), crs=ccrs.PlateCarree())
    ax[porder[i-1]].set_yticks(np.arange(-15,30,15), crs=ccrs.PlateCarree())   # 設定x, y座標的範圍，以及多少經緯度繪製刻度。
    ax[porder[i-1]].xaxis.set_major_formatter(lon_formatter)
    ax[porder[i-1]].yaxis.set_major_formatter(lat_formatter)
    ax[porder[i-1]].set_xlabel(' ')
    ax[porder[i-1]].set_ylabel(' ')
    ax[porder[i-1]].set_title(' ')
    ax[porder[i-1]].set_title('Phase '+str(i), loc='left')
    
# Add a colorbar axis at the bottom of the graph
cbar_ax = fig.add_axes([0.2, 0.07, 0.6, 0.015])
# Draw the colorbar 將colorbar畫在cbar_ax這個軸上。
cbar = fig.colorbar(cf, cax=cbar_ax,
                    orientation='horizontal',
                    ticks=clevs,
                    label=r'[mm hr$^{-1}$]')  

plt.subplots_adjust(hspace=0.15)
plt.suptitle('MJO Composite in DJF',y=0.92,size='large',weight='bold')
plt.show()    

_images/1c5b06aebcfb1a0429ce8a94ad091a525a76564ccc37a9760568a85cf2b3c421.png

11. flox: Advanced Tools for GroupBy and Resample

11. flox: Advanced Tools for GroupBy and Resample#

11. `flox`: Advanced Tools for GroupBy and Resample#