Source code for cf_xarray.geometry

from __future__ import annotations

import copy
from collections import ChainMap
from collections.abc import Hashable, Sequence
from dataclasses import dataclass
from typing import TYPE_CHECKING

import numpy as np
import pandas as pd
import xarray as xr
from numpy.typing import ArrayLike

GEOMETRY_CONTAINER_NAME = "geometry_container"
FEATURES_DIM_NAME = "features"

__all__ = [
    "decode_geometries",
    "encode_geometries",
    "cf_to_shapely",
    "shapely_to_cf",
]


if TYPE_CHECKING:
    from shapely import MultiPoint, Point

# Useful convention language:
# 1. Whether linked to normal CF space-time coordinates with a nodes attribute or not, inclusion of such coordinates is
#    recommended to maintain backward compatibility with software that has not implemented geometry capabilities.
# 2. The geometry node coordinate variables must each have an axis attribute whose allowable values are X, Y, and Z.
# 3. If a coordinates attribute is carried by the geometry container variable or its parent data variable, then those coordinate variables
#    that have a meaningful correspondence with node coordinates are indicated as such by a nodes attribute that names the corresponding node
#    coordinates, but only if the grid_mapping associated the geometry node variables is the same as that of the coordinate variables.
#    If a different grid mapping is used, then the provided coordinates must not have the nodes attribute.
#
# Interpretation:
# 1. node coordinates are exact; the 'normal' coordinates are a reasonable value to use, if you do not know how to interpret the nodes.



[docs]
@dataclass
class GeometryNames:
    """Helper class to ease handling of all the variable names needed for CF geometries."""


[docs]
    def __init__(
        self,
        suffix: str = "",
        grid_mapping_name: str | None = None,
        grid_mapping: str | None = None,
    ):
        self.container_name: str = GEOMETRY_CONTAINER_NAME + suffix
        self.node_dim: str = "node" + suffix
        self.node_count: str = "node_count" + suffix
        self.node_coordinates_x: str = "x" + suffix
        self.node_coordinates_y: str = "y" + suffix
        self.coordinates_x: str = "crd_x" + suffix
        self.coordinates_y: str = "crd_y" + suffix
        self.part_node_count: str = "part_node_count" + suffix
        self.part_dim: str = "part" + suffix
        self.interior_ring: str = "interior_ring" + suffix
        self.attrs_x: dict[str, str] = {}
        self.attrs_y: dict[str, str] = {}
        self.grid_mapping_attr = {"grid_mapping": grid_mapping} if grid_mapping else {}

        # Special treatment of selected grid mappings
        if grid_mapping_name in ["latitude_longitude", "rotated_latitude_longitude"]:
            # Special case for longitude_latitude type grid mappings
            self.coordinates_x = "lon"
            self.coordinates_y = "lat"
            if grid_mapping_name == "latitude_longitude":
                self.attrs_x = dict(units="degrees_east", standard_name="longitude")
                self.attrs_y = dict(units="degrees_north", standard_name="latitude")
            elif grid_mapping_name == "rotated_latitude_longitude":
                self.attrs_x = dict(
                    units="degrees_east", standard_name="grid_longitude"
                )
                self.attrs_y = dict(
                    units="degrees_north", standard_name="grid_latitude"
                )
        elif grid_mapping_name is not None:
            self.attrs_x = dict(standard_name="projection_x_coordinate")
            self.attrs_y = dict(standard_name="projection_y_coordinate")
        self.attrs_x.update(self.grid_mapping_attr)
        self.attrs_y.update(self.grid_mapping_attr)


    @property
    def geometry_container_attrs(self) -> dict[str, str]:
        return {
            "node_count": self.node_count,
            "node_coordinates": f"{self.node_coordinates_x} {self.node_coordinates_y}",
            "coordinates": f"{self.coordinates_x} {self.coordinates_y}",
            **self.grid_mapping_attr,
        }


[docs]
    def coords(
        self,
        *,
        dim: Hashable,
        x: ArrayLike,
        y: ArrayLike,
        crdX: ArrayLike | None = None,
        crdY: ArrayLike | None = None,
    ) -> dict[str, xr.DataArray]:
        """
        Construct coordinate DataArrays for the numpy data (x, y, crdX, crdY)

        Parameters
        ----------
        x: array
            Node coordinates for X coordinate
        y: array
            Node coordinates for Y coordinate
        crdX: array, optional
            Nominal X coordinate
        crdY: array, optional
            Nominal X coordinate
        """
        mapping = {
            self.node_coordinates_x: xr.DataArray(
                x, dims=self.node_dim, attrs={"axis": "X", **self.attrs_x}
            ),
            self.node_coordinates_y: xr.DataArray(
                y, dims=self.node_dim, attrs={"axis": "Y", **self.attrs_y}
            ),
        }
        if crdX is not None:
            mapping[self.coordinates_x] = xr.DataArray(
                crdX,
                dims=(dim,),
                attrs={"nodes": self.node_coordinates_x, **self.attrs_x},
            )
        if crdY is not None:
            mapping[self.coordinates_y] = xr.DataArray(
                crdY,
                dims=(dim,),
                attrs={"nodes": self.node_coordinates_y, **self.attrs_y},
            )
        return mapping




def _assert_single_geometry_container(ds: xr.Dataset) -> Hashable:
    container_names = _get_geometry_containers(ds)
    if len(container_names) > 1:
        raise ValueError(
            "Only one geometry container is supported by cf_to_points. "
            "To handle multiple geometries use `decode_geometries` instead."
        )
    (container_name,) = container_names
    return container_name


def _get_geometry_containers(obj: xr.DataArray | xr.Dataset) -> list[Hashable]:
    """
    Translate from key (either CF key or variable name) to its bounds' variable names.

    This function interprets the ``geometry`` attribute on DataArrays.

    Parameters
    ----------
    obj : DataArray, Dataset
        DataArray belonging to the coordinate to be checked

    Returns
    -------
    List[str]
        Variable name(s) in parent xarray object that are bounds of `key`
    """

    if isinstance(obj, xr.DataArray):
        obj = obj._to_temp_dataset()
    variables = obj._variables

    results = set()
    for name, var in variables.items():
        attrs_or_encoding = ChainMap(var.attrs, var.encoding)
        if "geometry_type" in attrs_or_encoding:
            results.update([name])
    return list(results)



[docs]
def decode_geometries(encoded: xr.Dataset) -> xr.Dataset:
    """
    Decode CF encoded geometries to numpy object arrays containing shapely geometries.

    Parameters
    ----------
    encoded : Dataset
        A Xarray Dataset containing encoded geometries.

    Returns
    -------
    Dataset
        A Xarray Dataset containing decoded geometries.

    See Also
    --------
    shapely_to_cf
    cf_to_shapely
    encode_geometries
    """

    containers = _get_geometry_containers(encoded)
    if not containers:
        raise NotImplementedError(
            "No geometry container variables detected, none of the provided variables "
            "have a `geometry_type` attribute."
        )

    todrop: list[Hashable] = []
    decoded = xr.Dataset()
    for container_name in containers:
        enc_geom_var = encoded[container_name]
        geom_attrs = enc_geom_var.attrs

        # Grab the coordinates attribute
        geom_attrs.update(enc_geom_var.encoding)

        geom_var = cf_to_shapely(encoded, container=container_name).variable

        todrop.extend(
            (container_name,)
            + tuple(
                s
                for s in " ".join(
                    geom_attrs.get(attr, "")
                    for attr in [
                        "interior_ring",
                        "node_coordinates",
                        "node_count",
                        "part_node_count",
                        "coordinates",
                    ]
                ).split(" ")
                if s
            )
        )

        name = geom_attrs.get("variable_name", None)
        if name in encoded.dims:
            decoded = decoded.assign_coords(
                xr.Coordinates(coords={name: geom_var}, indexes={})
            )
        else:
            decoded[name] = geom_var

    decoded.update(encoded.drop_vars(todrop))

    # Is this a good idea? We are deleting information.
    # OTOH we have decoded it to a useful in-memory representation
    for var in decoded._variables.values():
        if var.attrs.get("geometry") in containers:
            var.attrs.pop("geometry")
    return decoded




[docs]
def encode_geometries(ds: xr.Dataset):
    """
    Encode any discovered geometry variables using the CF conventions.

    Practically speaking, geometry variables are numpy object arrays where the first
    element is a shapely geometry.

    Parameters
    ----------
    ds : Dataset
       Dataset containing at least one geometry variable.

    Returns
    -------
    Dataset
       Where all geometry variables are encoded. The information in a single geometry
       variable in the input is split across multiple variables in the returned Dataset
       following the CF conventions.

    See Also
    --------
    shapely_to_cf
    cf_to_shapely
    """
    from shapely import (
        LineString,
        MultiLineString,
        MultiPoint,
        MultiPolygon,
        Point,
        Polygon,
    )

    SHAPELY_TYPES = (
        Point,
        LineString,
        Polygon,
        MultiPoint,
        MultiLineString,
        MultiPolygon,
    )

    geom_var_names = [
        name
        for name, var in ds._variables.items()
        if var.dtype == "geometry"
        or (var.dtype == "O" and isinstance(var.data.flat[0], SHAPELY_TYPES))
    ]

    if not geom_var_names:
        return ds

    if to_drop := set(geom_var_names) & set(ds._indexes):
        # e.g. xvec GeometryIndex
        ds = ds.drop_indexes(to_drop)

    variables = {}
    for name in geom_var_names:
        # TODO: do we prefer this choice be invariant to number of geometry variables
        suffix = "_" + str(name) if len(geom_var_names) > 1 else ""
        container_name = GEOMETRY_CONTAINER_NAME + suffix
        # If `name` is a dimension name, then we need to drop it. Otherwise we don't
        # So set errors="ignore"
        variables.update(
            shapely_to_cf(ds[name], suffix=suffix)
            .drop_vars(name, errors="ignore")
            ._variables
        )

        geom_var = ds[name]
        more_updates = {}
        for varname, var in ds._variables.items():
            if varname == name:
                continue
            # TODO: this is incomplete. It works for vector data cubes where one of the geometry vars
            # is a dimension coordinate.
            if name in var.dims:
                var = var.copy(deep=False)
                var._attrs = copy.deepcopy(var._attrs)
                var.attrs["geometry"] = container_name
                # The grid_mapping and coordinates attributes can be carried by the geometry container
                # variable provided they are also carried by the data variables associated with the container.
                if to_add := geom_var.attrs.get("coordinates", ""):
                    var.attrs["coordinates"] = var.attrs.get("coordinates", "") + to_add
            more_updates[varname] = var
        variables.update(more_updates)

        # WARNING: cf-xarray specific convention.
        # For vector data cubes, `name` is a dimension name.
        # By encoding to CF, we have
        # encoded the information in that variable across many different
        # variables (e.g. node_count) with `name` as a dimension.
        # We have to record `name` somewhere so that we reconstruct
        # a geometry variable of the right name at decode-time.
        variables[container_name].attrs["variable_name"] = name

    encoded = xr.Dataset(variables).set_coords(
        set(ds._coord_names) - set(geom_var_names)
    )

    return encoded



def reshape_unique_geometries(
    ds: xr.Dataset,
    geom_var: str = "geometry",
    new_dim: str = FEATURES_DIM_NAME,
) -> xr.Dataset:
    """Reshape a dataset containing a geometry variable so that all unique features are
    identified along a new dimension.

    This function only makes sense if the dimension of the geometry variable has no coordinate,
    or if that coordinate has repeated values for each geometry.

    Parameters
    ----------
    ds : xr.Dataset
        A Dataset.
    geom_var : string
        Name of the variable in `ds` that contains the geometry objects of type shapely.geometry.
        The variable must be 1D.
    new_dim : string
        Name of the new dimension in the returned object.

    Returns
    -------
    Dataset
        All variables sharing the dimension of `ds[geom_var]` are reshaped so that `new_dim`
        as a length equal to the number of unique geometries.
    """
    if ds[geom_var].ndim > 1:
        raise ValueError(
            f"The geometry variable must be 1D. Got ds[{geom_var}] with dims {ds[geom_var].dims}."
        )

    # Shapely objects are not hashable, thus np.unique cannot be used directly.
    # This trick is stolen from geopandas.
    _, unique_indexes, inv_indexes = np.unique(
        [g.wkb for g in ds[geom_var].values], return_index=True, return_inverse=True
    )
    old_name = ds[geom_var].dims[0]

    if old_name in ds.coords:
        old_values = ds[old_name].values
    else:
        # A dummy coord, a kind of counter, independent for each unique geometries
        old_values = np.array(
            [(inv_indexes[:i] == ind).sum() for i, ind in enumerate(inv_indexes)]
        )

    multi_index = pd.MultiIndex.from_arrays(
        (inv_indexes, old_values), names=(new_dim, old_name)
    )
    temp_name = "__temp_multi_index__"
    out = ds.rename({old_name: temp_name})
    out[temp_name] = multi_index
    out = out.unstack(temp_name)

    # geom_var was reshaped also, reconstruct it from the unique values.
    unique_indexes_da = xr.DataArray(unique_indexes, dims=(new_dim,))
    out[geom_var] = ds[geom_var].isel({old_name: unique_indexes_da})
    if old_name not in ds.coords:
        # If there was no coord before, drop the dummy one we made.
        out = out.drop_vars(old_name)  # type: ignore[arg-type,unused-ignore]  # Hashable/str stuff
    return out



[docs]
def shapely_to_cf(
    geometries: xr.DataArray | Sequence,
    grid_mapping: str | None = None,
    *,
    suffix: str = "",
):
    """
    Convert a DataArray with shapely geometry objects into a CF-compliant dataset.

    Parameters
    ----------
    geometries : sequence of shapely geometries or xarray.DataArray
        A sequence of geometry objects or a Dataset with a "geometry" variable storing such geometries.
        All geometries must be of the same base type : Point, Line or Polygon, but multipart geometries are accepted.

    grid_mapping : str, optional
        A CF grid mapping name. When given, coordinates and attributes are named and set accordingly.
        Defaults to None, in which case the coordinates are simply names "crd_x" and "crd_y".

    container_name: str, optional
        Name for the "geometry container" scalar variable in the encoded Dataset

    Returns
    -------
    xr.Dataset
        A dataset with shapely geometry objects translated into CF-compliant variables :
         - 'x', 'y' : the node coordinates
         - 'crd_x', 'crd_y' : the feature coordinates (might have different names if `grid_mapping` is available).
         - 'node_count' : The number of nodes per feature. Always present for Lines and Polygons. For Points: only present if there are multipart geometries.
         - 'part_node_count' : The number of nodes per individual geometry. Only for Lines with multipart geometries and for Polygons with multipart geometries or holes.
         - 'interior_ring' : Integer boolean indicating whether rings are interior or exterior. Only for Polygons with holes.
         - container_name : Empty variable with attributes describing the geometry type.

    References
    ----------
    Please refer to the CF conventions document: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#geometries
    """

    if isinstance(geometries, xr.DataArray) and grid_mapping is not None:
        raise DeprecationWarning(
            "Explicitly passing `grid_mapping` with DataArray of geometries is deprecated. "
            "Please set a `grid_mapping` attribute on `geometries`, ",
            "and set the grid mapping variable as a coordinate",
        )

    as_data = geometries.data if isinstance(geometries, xr.DataArray) else geometries
    type_ = as_data[0].geom_type

    grid_mapping_varname = None
    if (
        grid_mapping is None
        and isinstance(geometries, xr.DataArray)
        and (grid_mapping_varname := geometries.attrs.get("grid_mapping"))
    ):
        if grid_mapping_varname in geometries.coords:
            # Not all CRS can be encoded in CF
            grid_mapping = geometries.coords[grid_mapping_varname].attrs.get(
                "grid_mapping_name", None
            )

    # TODO: consider accepting a GeometryNames instance from the user instead
    names = GeometryNames(
        suffix=suffix, grid_mapping_name=grid_mapping, grid_mapping=grid_mapping_varname
    )

    try:
        if type_ in ["Point", "MultiPoint"]:
            ds = points_to_cf(geometries, names=names)
        elif type_ in ["LineString", "MultiLineString"]:
            ds = lines_to_cf(geometries, names=names)
        elif type_ in ["Polygon", "MultiPolygon"]:
            ds = polygons_to_cf(geometries, names=names)
        else:
            raise ValueError(
                f"This geometry type is not supported in CF-compliant datasets. Got {type_}"
            )
    except NotImplementedError as e:
        raise ValueError(
            "Error converting geometries. Possibly you have provided mixed geometry types."
        ) from e

    return ds




[docs]
def cf_to_shapely(ds: xr.Dataset, *, container: Hashable = GEOMETRY_CONTAINER_NAME):
    """
    Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable.

    Parameters
    ----------
    ds : xr.Dataset
        Must contain a ``geometry_container`` variable with attributes giving the geometry specifications.
        Must contain all variables needed to reconstruct the geometries listed in these specifications.

    Returns
    -------
    da: xr.DataArray
        A 1D DataArray of shapely objects.
        It has the same dimension as the ``node_count`` or the coordinates variables, or
        ``features`` if those were not present in ``ds``.

    References
    ----------
    Please refer to the CF conventions document: http://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#geometries
    """
    if container not in ds._variables:
        raise ValueError(
            f"{container!r} is not the name of a variable in the provided Dataset."
        )
    if not (geom_type := ds[container].attrs.get("geometry_type", None)):
        raise ValueError(
            f"{container!r} is not the name of a valid geometry variable. "
            "It does not have a `geometry_type` attribute."
        )

    # Extract all necessary geometry variables
    subds = ds.cf[[container]]
    if geom_type == "point":
        geometries = cf_to_points(subds)
    elif geom_type == "line":
        geometries = cf_to_lines(subds)
    elif geom_type == "polygon":
        geometries = cf_to_polygons(subds)
    else:
        raise ValueError(
            f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
        )
    if gm := ds[container].attrs.get("grid_mapping"):
        geometries.attrs["grid_mapping"] = gm

    return geometries.rename("geometry")



def points_to_cf(
    pts: xr.DataArray | Sequence[Point | MultiPoint],
    *,
    names: GeometryNames | None = None,
):
    """Get a list of points (shapely.geometry.[Multi]Point) and return a CF-compliant geometry dataset.

    Parameters
    ----------
    pts : sequence of shapely.geometry.Point or MultiPoint
        The sequence of [multi]points to translate to a CF dataset.

    Returns
    -------
    xr.Dataset
        A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'.
        The coordinates of MultiPoint instances are their first point.
    """
    from shapely.geometry import MultiPoint

    pts_: Sequence[Point | MultiPoint]
    if isinstance(pts, xr.DataArray):
        # TODO: Fix this hardcoding
        if pts.ndim != 1:
            raise ValueError("Only 1D DataArrays are supported.")
        dim = pts.dims[0]
        coord = pts[dim] if dim in pts.coords else None
        pts_ = pts.values.tolist()
    else:
        dim = FEATURES_DIM_NAME
        coord = None
        pts_ = pts

    x: list[np.ndarray] = []
    y: list[np.ndarray] = []
    node_count: list[int] = []
    crdX: list[float] = []
    crdY: list[float] = []
    for pt in pts_:
        if isinstance(pt, MultiPoint):
            xy = np.concatenate([p.coords for p in pt.geoms])
        else:
            xy = np.atleast_2d(pt.coords)
        x.extend(xy[:, 0])
        y.extend(xy[:, 1])
        node_count.append(xy.shape[0])
        crdX.append(xy[0, 0])
        crdY.append(xy[0, 1])

    if names is None:
        names = GeometryNames()

    ds = xr.Dataset(
        data_vars={
            names.node_count: xr.DataArray(node_count, dims=(dim,)),
            names.container_name: xr.DataArray(
                data=np.nan,
                attrs={"geometry_type": "point", **names.geometry_container_attrs},
            ),
        },
        coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim),
    )

    if coord is not None:
        ds = ds.assign_coords({dim: coord})

    # Special case when we have no MultiPoints
    if (ds[names.node_count] == 1).data.all():
        ds = ds.drop_vars(names.node_count)
        del ds[names.container_name].attrs["node_count"]
    return ds


def cf_to_points(ds: xr.Dataset):
    """Convert point geometries stored in a CF-compliant way to shapely points stored in a single variable.

    Parameters
    ----------
    ds : xr.Dataset
        A dataset with CF-compliant point geometries.
        Must have a *single* "geometry container" variable with at least a 'node_coordinates' attribute.
        Must also have the two 1D variables listed by this attribute.

    Returns
    -------
    geometry : xr.DataArray
        A 1D array of shapely.geometry.[Multi]Point objects.
        It has the same dimension as the ``node_count`` or the coordinates variables, or
        ``'features'`` if those were not present in ``ds``.
    """
    from shapely.geometry import MultiPoint, Point

    container_name = _assert_single_geometry_container(ds)
    # Shorthand for convenience
    geo = ds[container_name].attrs

    # The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present.
    feat_dim = None
    if "coordinates" in geo and feat_dim is None:
        xcoord_name, _ = geo["coordinates"].split(" ")
        (feat_dim,) = ds[xcoord_name].dims

    x_name, y_name = ds[container_name].attrs["node_coordinates"].split(" ")
    xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)

    node_count_name = ds[container_name].attrs.get("node_count")
    if node_count_name is None:
        # No node_count means all geometries are single points (node_count = 1)
        # And if we had no coordinates, then the dimension defaults to FEATURES_DIM_NAME
        feat_dim = feat_dim or FEATURES_DIM_NAME
        node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,))
        if feat_dim in ds.coords:
            node_count = node_count.assign_coords({feat_dim: ds[feat_dim]})
    else:
        node_count = ds[node_count_name]

    j = 0  # The index of the first node.
    geoms = np.empty(node_count.shape, dtype=object)
    # i is the feature index, n its number of nodes
    for i, n in enumerate(node_count.values):
        if n == 1:
            geoms[i] = Point(xy[j, :])
        else:
            geoms[i] = MultiPoint(xy[j : j + n, :])
        j += n

    da = xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)
    if node_count_name:
        del da[node_count_name]
    return da


def lines_to_cf(lines: xr.DataArray | Sequence, *, names: GeometryNames | None = None):
    """Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset.

    Parameters
    ----------
    lines : sequence of shapely.geometry.Line or MultiLine
        The sequence of [multi]lines to translate to a CF dataset.

    Returns
    -------
    xr.Dataset
        A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
        and optionally 'part_node_count'.
    """
    from shapely import to_ragged_array

    if isinstance(lines, xr.DataArray):
        dim = lines.dims[0]
        coord = lines[dim] if dim in lines.coords else None
        lines_ = lines.values
    else:
        dim = "index"
        coord = None
        lines_ = np.array(lines)

    if names is None:
        names = GeometryNames()

    _, arr, offsets = to_ragged_array(lines_)
    x = arr[:, 0]
    y = arr[:, 1]

    part_node_count = np.diff(offsets[0])
    if len(offsets) == 1:
        indices = offsets[0]
        node_count = part_node_count
    else:
        indices = np.take(offsets[0], offsets[1])
        node_count = np.diff(indices)

    geom_coords = arr.take(indices[:-1], 0)
    crdX = geom_coords[:, 0]
    crdY = geom_coords[:, 1]

    ds = xr.Dataset(
        data_vars={
            names.node_count: xr.DataArray(node_count, dims=(dim,)),
            names.container_name: xr.DataArray(
                data=np.nan,
                attrs={"geometry_type": "line", **names.geometry_container_attrs},
            ),
        },
        coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim),
    )

    if coord is not None:
        ds = ds.assign_coords({dim: coord})

    # Special case when we have no MultiLines
    if len(part_node_count) != len(node_count):
        ds[names.part_node_count] = xr.DataArray(part_node_count, dims=names.part_dim)
        ds[names.container_name].attrs["part_node_count"] = names.part_node_count

    return ds


def cf_to_lines(ds: xr.Dataset):
    """Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable.

    Parameters
    ----------
    ds : xr.Dataset
        A dataset with CF-compliant line geometries.
        Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
        Must also have the two 1D variables listed by this attribute.

    Returns
    -------
    geometry : xr.DataArray
        A 1D array of shapely.geometry.[Multi]Line objects.
        It has the same dimension as the ``part_node_count`` or the coordinates variables, or
        ``'features'`` if those were not present in ``ds``.
    """
    from shapely import GeometryType, from_ragged_array

    container_name = _assert_single_geometry_container(ds)

    # Shorthand for convenience
    geo = ds[container_name].attrs

    # The features dimension name, defaults to the one of 'node_count'
    # or the dimension of the coordinates, if present.
    feat_dim = None
    if "coordinates" in geo:
        xcoord_name, _ = geo["coordinates"].split(" ")
        (feat_dim,) = ds[xcoord_name].dims

    x_name, y_name = geo["node_coordinates"].split(" ")
    xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)

    node_count_name = geo.get("node_count")
    part_node_count_name = geo.get("part_node_count", node_count_name)
    if node_count_name is None:
        raise ValueError("'node_count' must be provided for line geometries")
    else:
        node_count = ds[node_count_name]
        feat_dim = feat_dim or "index"
        if feat_dim in ds.coords:
            node_count = node_count.assign_coords({feat_dim: ds[feat_dim]})

    # first get geometries for all the parts
    part_node_count = ds[part_node_count_name]
    offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0)
    lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,))

    # get index of offset2 values that are edges for part_node_count
    offset2 = np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0]

    multilines = from_ragged_array(
        GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2)
    )

    # get items from lines or multilines depending on number of parts
    geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines)

    return xr.DataArray(
        geoms, dims=node_count.dims, coords=node_count.coords
    ).drop_vars(node_count_name)


def polygons_to_cf(
    polygons: xr.DataArray | Sequence, *, names: GeometryNames | None = None
):
    """Convert an iterable of polygons (shapely.geometry.[Multi]Polygon) into a CF-compliant geometry dataset.

    Parameters
    ----------
    polygons : sequence of shapely.geometry.Polygon or MultiPolygon
        The sequence of [multi]polygons to translate to a CF dataset.

    names: GeometryNames, optional
       Structure that helps manipulate geometry attrs.

    Returns
    -------
    xr.Dataset
        A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'
        and optionally 'part_node_count'.
    """
    from shapely import to_ragged_array

    if isinstance(polygons, xr.DataArray):
        dim = polygons.dims[0]
        coord = polygons[dim] if dim in polygons.coords else None
        polygons_ = polygons.values
    else:
        dim = "index"
        coord = None
        polygons_ = np.array(polygons)

    if names is None:
        names = GeometryNames()

    _, arr, offsets = to_ragged_array(polygons_)
    x = arr[:, 0]
    y = arr[:, 1]

    part_node_count = np.diff(offsets[0])
    if len(offsets) == 1:
        indices = offsets[0]
        node_count = part_node_count
    elif len(offsets) >= 2:
        indices = np.take(offsets[0], offsets[1])
        interior_ring = np.isin(offsets[0], indices, invert=True)[:-1]

        if len(offsets) == 3:
            indices = np.take(indices, offsets[2])

        node_count = np.diff(indices)

    geom_coords = arr.take(indices[:-1], 0)
    crdX = geom_coords[:, 0]
    crdY = geom_coords[:, 1]

    data_vars = {names.node_count: (dim, node_count)}
    geometry_attrs = names.geometry_container_attrs

    # Special case when we have no MultiPolygons and no holes
    if len(part_node_count) != len(node_count):
        data_vars[names.part_node_count] = (names.part_dim, part_node_count)
        geometry_attrs["part_node_count"] = names.part_node_count

    # Special case when we have no holes
    if interior_ring.any():
        data_vars[names.interior_ring] = (names.part_dim, interior_ring)
        geometry_attrs["interior_ring"] = names.interior_ring

    data_vars[names.container_name] = (  # type: ignore[assignment]
        (),
        np.nan,
        {"geometry_type": "polygon", **geometry_attrs},
    )
    ds = xr.Dataset(
        data_vars=data_vars,
        coords=names.coords(x=x, y=y, crdX=crdX, crdY=crdY, dim=dim),
    )

    if coord is not None:
        ds = ds.assign_coords({dim: coord})

    return ds


def cf_to_polygons(ds: xr.Dataset):
    """Convert polygon geometries stored in a CF-compliant way to shapely polygons stored in a single variable.

    Parameters
    ----------
    ds : xr.Dataset
        A dataset with CF-compliant polygon geometries.
        Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
        Must also have the two 1D variables listed by this attribute.

    Returns
    -------
    geometry : xr.DataArray
        A 1D array of shapely.geometry.[Multi]Polygon objects.
        It has the same dimension as the ``part_node_count`` or the coordinates variables, or
        ``'features'`` if those were not present in ``ds``.
    """
    from shapely import GeometryType, from_ragged_array

    container_name = _assert_single_geometry_container(ds)

    # Shorthand for convenience
    geo = ds[container_name].attrs

    # The features dimension name, defaults to the one of 'part_node_count'
    # or the dimension of the coordinates, if present.
    feat_dim = None
    if "coordinates" in geo:
        xcoord_name, _ = geo["coordinates"].split(" ")
        (feat_dim,) = ds[xcoord_name].dims

    x_name, y_name = geo["node_coordinates"].split(" ")
    xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)

    node_count_name = geo.get("node_count")
    part_node_count_name = geo.get("part_node_count", node_count_name)
    interior_ring_name = geo.get("interior_ring")

    if node_count_name is None:
        raise ValueError("'node_count' must be provided for polygon geometries")
    else:
        node_count = ds[node_count_name]
        feat_dim = feat_dim or "index"
        if feat_dim in ds.coords:
            node_count = node_count.assign_coords({feat_dim: ds[feat_dim]})

    # first get geometries for all the rings
    part_node_count = ds[part_node_count_name]
    offset1 = np.insert(np.cumsum(part_node_count.values), 0, 0)

    if interior_ring_name is None:
        offset2 = np.array(list(range(len(offset1))))
    else:
        interior_ring = ds[interior_ring_name]
        if not interior_ring[0] == 0:
            raise ValueError("coordinate array must start with an exterior ring")
        offset2 = np.append(np.where(interior_ring == 0)[0], [len(part_node_count)])

    polygons = from_ragged_array(GeometryType.POLYGON, xy, offsets=(offset1, offset2))

    # get index of offset2 values that are edges for node_count
    offset3 = np.nonzero(
        np.isin(
            offset2,
            np.nonzero(np.isin(offset1, np.insert(np.cumsum(node_count), 0, 0)))[0],
        )
    )[0]
    multipolygons = from_ragged_array(
        GeometryType.MULTIPOLYGON, xy, offsets=(offset1, offset2, offset3)
    )

    # get items from polygons or multipolygons depending on number of parts
    geoms = np.where(np.diff(offset3) == 1, polygons[offset3[:-1]], multipolygons)

    return xr.DataArray(
        geoms, dims=node_count.dims, coords=node_count.coords
    ).drop_vars(node_count_name)