"""Encoders and decoders for CF conventions not implemented by Xarray."""importnumpyasnpimportpandasaspdimportxarrayasxr
[docs]defencode_multi_index_as_compress(ds,idxnames=None):""" Encode a MultiIndexed dimension using the "compression by gathering" CF convention. Parameters ---------- ds : xarray.Dataset Dataset with at least one MultiIndexed dimension. idxnames : hashable or iterable of hashable, optional Dimensions that are MultiIndex-ed. If None, will detect all MultiIndex-ed dimensions. Returns ------- xarray.Dataset Encoded Dataset with ``name`` as a integer coordinate with a ``"compress"`` attribute. References ---------- CF conventions on `compression by gathering <http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#compression-by-gathering>`_ """ifidxnamesisNone:idxnames=tuple(nameforname,idxinds.indexes.items()ifisinstance(idx,pd.MultiIndex)# After the flexible indexes refactor, all MultiIndex Levels# have a MultiIndex but the name won't match.# Prior to that refactor, there is only a single MultiIndex with name=Noneand(idx.name==nameifidx.nameisnotNoneelseTrue))elifisinstance(idxnames,str):idxnames=(idxnames,)ifnotidxnames:raiseValueError("No MultiIndex-ed dimensions found in Dataset.")encoded=ds.reset_index(idxnames)foridxnameinidxnames:mindex=ds.indexes[idxname]coords=dict(zip(mindex.names,mindex.levels,strict=False))encoded.update(coords)forcincoords:encoded[c].attrs=ds[c].attrsencoded[c].encoding=ds[c].encodingencoded[idxname]=np.ravel_multi_index(mindex.codes,mindex.levshape)encoded[idxname].attrs=ds[idxname].attrs.copy()if("compress"inencoded[idxname].encodingor"compress"inencoded[idxname].attrs):raiseValueError(f"Does not support the 'compress' attribute in {idxname}.encoding or {idxname}.attrs. ""This is generated automatically.")encoded[idxname].attrs["compress"]=" ".join(mindex.names)returnencoded
[docs]defdecode_compress_to_multi_index(encoded,idxnames=None):""" Decode a compressed variable to a pandas MultiIndex. Parameters ---------- encoded : xarray.Dataset Encoded Dataset with variables that use "compression by gathering".capitalize. idxnames : hashable or iterable of hashable, optional Variable names that represents a compressed dimension. These variables must have the attribute ``"compress"``. If None, will detect all indexes with a ``"compress"`` attribute and decode those. Returns ------- xarray.Dataset Decoded Dataset with ``name`` as a MultiIndexed dimension. References ---------- CF conventions on `compression by gathering <http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#compression-by-gathering>`_ """decoded=xr.Dataset(data_vars=encoded.data_vars,attrs=encoded.attrs.copy())ifidxnamesisNone:idxnames=tuple(namefornameinencoded.indexesif"compress"inencoded[name].attrs)elifisinstance(idxnames,str):idxnames=(idxnames,)foridxnameinidxnames:if"compress"notinencoded[idxname].attrs:raiseValueError("Attribute 'compress' not found in provided Dataset.")ifnotisinstance(encoded,xr.Dataset):raiseValueError(f"Must provide a Dataset. Received {type(encoded)} instead.")names=encoded[idxname].attrs["compress"].split(" ")shape=[encoded.sizes[dim]fordiminnames]indices=np.unravel_index(encoded[idxname].data,shape)try:fromxarray.indexesimportPandasMultiIndexvariables={dim:encoded[dim].isel({dim:xr.Variable(data=index,dims=idxname)})fordim,indexinzip(names,indices,strict=False)}decoded=decoded.assign_coords(variables).set_xindex(names,PandasMultiIndex)exceptImportError:arrays=[encoded[dim].data[index]fordim,indexinzip(names,indices,strict=False)]mindex=pd.MultiIndex.from_arrays(arrays,names=names)decoded.coords[idxname]=mindexdecoded[idxname].attrs=encoded[idxname].attrs.copy()forcoordinnames:variable=encoded._variables[coord]decoded[coord].attrs=variable.attrs.copy()decoded[coord].encoding=variable.encoding.copy()deldecoded[idxname].attrs["compress"]returndecoded