Skip to content

VoronoiRegionalizer

Bases: Regionalizer

VoronoiRegionalizer.

Voronoi [1] regionalizer allows the given geometries to be divided into Thiessen polygons using geometries that are the seeds. To minimize distortions tessellation will be performed on a sphere using SphericalVoronoi [2] function from scipy library.

References
  1. https://en.wikipedia.org/wiki/Voronoi_diagram
  2. https://docs.scipy.org/doc/scipy-1.9.2/reference/generated/scipy.spatial.SphericalVoronoi.html
Source code in srai/regionalizers/voronoi_regionalizer.py
class VoronoiRegionalizer(Regionalizer):
    """
    VoronoiRegionalizer.

    Voronoi [1] regionalizer allows the given geometries to be divided
    into Thiessen polygons using geometries that are the seeds. To
    minimize distortions tessellation will be performed on a sphere
    using SphericalVoronoi [2] function from scipy library.

    References:
        1. https://en.wikipedia.org/wiki/Voronoi_diagram
        2. https://docs.scipy.org/doc/scipy-1.9.2/reference/generated/scipy.spatial.SphericalVoronoi.html
    """  # noqa: W505, E501

    def __init__(
        self,
        seeds: gpd.GeoDataFrame,
        max_meters_between_points: int = 10_000,
        num_of_multiprocessing_workers: int = -1,
        multiprocessing_activation_threshold: Optional[int] = None,
    ) -> None:
        """
        Init VoronoiRegionalizer.

        All (multi)polygons from seeds GeoDataFrame will be transformed to their centroids,
        because scipy function requires only points as an input.

        Args:
            seeds (gpd.GeoDataFrame): GeoDataFrame with seeds for
                creating a tessellation. Minimum 4 seeds are required.
                Seeds cannot lie on a single arc. Empty seeds will be removed.
            max_meters_between_points (int): Maximal distance in meters between two points
                in a resulting polygon. Higher number results lower resolution of a polygon.
            num_of_multiprocessing_workers (int): Number of workers used for
                multiprocessing. Defaults to `-1` which results in a total number of available
                cpu threads. `0` and `1` values disable multiprocessing.
                Similar to `n_jobs` parameter from `scikit-learn` library.
            multiprocessing_activation_threshold (int, optional): Number of seeds required to start
                processing on multiple processes. Activating multiprocessing for a small
                amount of points might not be feasible. Defaults to 100.

        Raises:
            ValueError: If any seed is duplicated.
            ValueError: If less than 4 seeds are provided.
            ValueError: If provided seeds geodataframe has no crs defined.
        """
        import_optional_dependencies(
            dependency_group="voronoi", modules=["haversine", "pymap3d", "spherical_geometry"]
        )
        seeds_wgs84 = seeds.to_crs(crs=WGS84_CRS)
        self.region_ids: List[Hashable] = []
        self.seeds: List[Point] = []
        self.max_meters_between_points = max_meters_between_points
        self.num_of_multiprocessing_workers = num_of_multiprocessing_workers
        self.multiprocessing_activation_threshold = multiprocessing_activation_threshold
        for index, row in seeds_wgs84.iterrows():
            candidate_point = row.geometry.centroid
            if not candidate_point.is_empty:
                self.region_ids.append(index)
                self.seeds.append(candidate_point)

        if len(self.seeds) < 4:
            raise ValueError("Minimum 4 seeds are required.")

        duplicated_seeds_ids = self._get_duplicated_seeds_ids()
        if duplicated_seeds_ids:
            raise ValueError(f"Duplicate seeds present: {duplicated_seeds_ids}.")

    def transform(self, gdf: Optional[gpd.GeoDataFrame] = None) -> gpd.GeoDataFrame:
        """
        Regionalize a given GeoDataFrame.

        Returns a list of disjointed regions consisting of Thiessen cells generated
        using a Voronoi diagram on the sphere.

        Args:
            gdf (Optional[gpd.GeoDataFrame], optional): GeoDataFrame to be regionalized.
                Will use this list of geometries to crop resulting regions. If None, a boundary box
                with bounds (-180, -90, 180, 90) is used to return regions covering whole Earth.
                Defaults to None.

        Returns:
            gpd.GeoDataFrame: GeoDataFrame with the regionalized data cropped using input
                GeoDataFrame.

        Raises:
            ValueError: If provided geodataframe has no crs defined.
            ValueError: If seeds are laying on a single arc.
        """
        from ._spherical_voronoi import generate_voronoi_regions

        if gdf is None:
            gdf = gpd.GeoDataFrame(
                {GEOMETRY_COLUMN: [box(minx=-180, maxx=180, miny=-90, maxy=90)]}, crs=WGS84_CRS
            )

        gdf_wgs84 = gdf.to_crs(crs=WGS84_CRS)
        generated_regions = generate_voronoi_regions(
            seeds=self.seeds,
            max_meters_between_points=self.max_meters_between_points,
            num_of_multiprocessing_workers=self.num_of_multiprocessing_workers,
            multiprocessing_activation_threshold=self.multiprocessing_activation_threshold,
        )
        regions_gdf = gpd.GeoDataFrame(
            data={GEOMETRY_COLUMN: generated_regions}, index=self.region_ids, crs=WGS84_CRS
        )
        regions_gdf.index.rename(REGIONS_INDEX, inplace=True)
        clipped_regions_gdf = regions_gdf.clip(mask=gdf_wgs84, keep_geom_type=False)
        return clipped_regions_gdf

    def _get_duplicated_seeds_ids(self) -> List[Hashable]:
        """Return all seeds ids that overlap with another using quick sjoin operation."""
        gdf = gpd.GeoDataFrame(
            data={GEOMETRY_COLUMN: self.seeds}, index=self.region_ids, crs=WGS84_CRS
        )
        duplicated_seeds = gdf.sjoin(gdf).index.value_counts().loc[lambda x: x > 1]
        duplicated_seeds_ids: List[Hashable] = duplicated_seeds.index.to_list()
        return duplicated_seeds_ids

__init__

__init__(seeds: gpd.GeoDataFrame, max_meters_between_points: int = 10000, num_of_multiprocessing_workers: int = -1, multiprocessing_activation_threshold: Optional[int] = None) -> None

Init VoronoiRegionalizer.

All (multi)polygons from seeds GeoDataFrame will be transformed to their centroids, because scipy function requires only points as an input.

PARAMETER DESCRIPTION
seeds

GeoDataFrame with seeds for creating a tessellation. Minimum 4 seeds are required. Seeds cannot lie on a single arc. Empty seeds will be removed.

TYPE: gpd.GeoDataFrame

max_meters_between_points

Maximal distance in meters between two points in a resulting polygon. Higher number results lower resolution of a polygon.

TYPE: int DEFAULT: 10000

num_of_multiprocessing_workers

Number of workers used for multiprocessing. Defaults to -1 which results in a total number of available cpu threads. 0 and 1 values disable multiprocessing. Similar to n_jobs parameter from scikit-learn library.

TYPE: int DEFAULT: -1

multiprocessing_activation_threshold

Number of seeds required to start processing on multiple processes. Activating multiprocessing for a small amount of points might not be feasible. Defaults to 100.

TYPE: int DEFAULT: None

RAISES DESCRIPTION
ValueError

If any seed is duplicated.

ValueError

If less than 4 seeds are provided.

ValueError

If provided seeds geodataframe has no crs defined.

Source code in srai/regionalizers/voronoi_regionalizer.py
def __init__(
    self,
    seeds: gpd.GeoDataFrame,
    max_meters_between_points: int = 10_000,
    num_of_multiprocessing_workers: int = -1,
    multiprocessing_activation_threshold: Optional[int] = None,
) -> None:
    """
    Init VoronoiRegionalizer.

    All (multi)polygons from seeds GeoDataFrame will be transformed to their centroids,
    because scipy function requires only points as an input.

    Args:
        seeds (gpd.GeoDataFrame): GeoDataFrame with seeds for
            creating a tessellation. Minimum 4 seeds are required.
            Seeds cannot lie on a single arc. Empty seeds will be removed.
        max_meters_between_points (int): Maximal distance in meters between two points
            in a resulting polygon. Higher number results lower resolution of a polygon.
        num_of_multiprocessing_workers (int): Number of workers used for
            multiprocessing. Defaults to `-1` which results in a total number of available
            cpu threads. `0` and `1` values disable multiprocessing.
            Similar to `n_jobs` parameter from `scikit-learn` library.
        multiprocessing_activation_threshold (int, optional): Number of seeds required to start
            processing on multiple processes. Activating multiprocessing for a small
            amount of points might not be feasible. Defaults to 100.

    Raises:
        ValueError: If any seed is duplicated.
        ValueError: If less than 4 seeds are provided.
        ValueError: If provided seeds geodataframe has no crs defined.
    """
    import_optional_dependencies(
        dependency_group="voronoi", modules=["haversine", "pymap3d", "spherical_geometry"]
    )
    seeds_wgs84 = seeds.to_crs(crs=WGS84_CRS)
    self.region_ids: List[Hashable] = []
    self.seeds: List[Point] = []
    self.max_meters_between_points = max_meters_between_points
    self.num_of_multiprocessing_workers = num_of_multiprocessing_workers
    self.multiprocessing_activation_threshold = multiprocessing_activation_threshold
    for index, row in seeds_wgs84.iterrows():
        candidate_point = row.geometry.centroid
        if not candidate_point.is_empty:
            self.region_ids.append(index)
            self.seeds.append(candidate_point)

    if len(self.seeds) < 4:
        raise ValueError("Minimum 4 seeds are required.")

    duplicated_seeds_ids = self._get_duplicated_seeds_ids()
    if duplicated_seeds_ids:
        raise ValueError(f"Duplicate seeds present: {duplicated_seeds_ids}.")

transform

transform(gdf: Optional[gpd.GeoDataFrame] = None) -> gpd.GeoDataFrame

Regionalize a given GeoDataFrame.

Returns a list of disjointed regions consisting of Thiessen cells generated using a Voronoi diagram on the sphere.

PARAMETER DESCRIPTION
gdf

GeoDataFrame to be regionalized. Will use this list of geometries to crop resulting regions. If None, a boundary box with bounds (-180, -90, 180, 90) is used to return regions covering whole Earth. Defaults to None.

TYPE: Optional[gpd.GeoDataFrame] DEFAULT: None

RETURNS DESCRIPTION
gpd.GeoDataFrame

gpd.GeoDataFrame: GeoDataFrame with the regionalized data cropped using input GeoDataFrame.

RAISES DESCRIPTION
ValueError

If provided geodataframe has no crs defined.

ValueError

If seeds are laying on a single arc.

Source code in srai/regionalizers/voronoi_regionalizer.py
def transform(self, gdf: Optional[gpd.GeoDataFrame] = None) -> gpd.GeoDataFrame:
    """
    Regionalize a given GeoDataFrame.

    Returns a list of disjointed regions consisting of Thiessen cells generated
    using a Voronoi diagram on the sphere.

    Args:
        gdf (Optional[gpd.GeoDataFrame], optional): GeoDataFrame to be regionalized.
            Will use this list of geometries to crop resulting regions. If None, a boundary box
            with bounds (-180, -90, 180, 90) is used to return regions covering whole Earth.
            Defaults to None.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with the regionalized data cropped using input
            GeoDataFrame.

    Raises:
        ValueError: If provided geodataframe has no crs defined.
        ValueError: If seeds are laying on a single arc.
    """
    from ._spherical_voronoi import generate_voronoi_regions

    if gdf is None:
        gdf = gpd.GeoDataFrame(
            {GEOMETRY_COLUMN: [box(minx=-180, maxx=180, miny=-90, maxy=90)]}, crs=WGS84_CRS
        )

    gdf_wgs84 = gdf.to_crs(crs=WGS84_CRS)
    generated_regions = generate_voronoi_regions(
        seeds=self.seeds,
        max_meters_between_points=self.max_meters_between_points,
        num_of_multiprocessing_workers=self.num_of_multiprocessing_workers,
        multiprocessing_activation_threshold=self.multiprocessing_activation_threshold,
    )
    regions_gdf = gpd.GeoDataFrame(
        data={GEOMETRY_COLUMN: generated_regions}, index=self.region_ids, crs=WGS84_CRS
    )
    regions_gdf.index.rename(REGIONS_INDEX, inplace=True)
    clipped_regions_gdf = regions_gdf.clip(mask=gdf_wgs84, keep_geom_type=False)
    return clipped_regions_gdf