GeoparquetLoader

Bases: Loader

GeoparquetLoader.

Geoparquet [1] loader is a wrapper for a geopandas.read_parquet function and allows for an automatic index setting and additional geometry clipping.

References

https://github.com/opengeospatial/geoparquet

Source code in srai/loaders/geoparquet_loader.py

class GeoparquetLoader(Loader):
    """
    GeoparquetLoader.

    Geoparquet [1] loader is a wrapper for a `geopandas.read_parquet` function
    and allows for an automatic index setting and additional geometry clipping.

    References:
        1. https://github.com/opengeospatial/geoparquet
    """

    def load(
        self,
        file_path: Union[Path, str],
        index_column: Optional[str] = None,
        columns: Optional[List[str]] = None,
        area: Optional[gpd.GeoDataFrame] = None,
    ) -> gpd.GeoDataFrame:
        """
        Load a geoparquet file.

        Args:
            file_path (Union[Path, str]): parquet file path.
            index_column (str, optional): Column that will be used as an index.
                If not provided, automatic indexing will be applied by default. Defaults to None.
            columns (List[str], optional): List of columns to load.
                If not provided, all will be loaded. Defaults to None.
            area (gpd.GeoDataFrame, optional): Mask to clip loaded data.
                If not provided, unaltered data will be returned. Defaults to None.

        Raises:
            ValueError: If provided index column doesn't exists in list of loaded columns.

        Returns:
            gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.
        """
        if columns and GEOMETRY_COLUMN not in columns:
            columns.append(GEOMETRY_COLUMN)

        gdf = gpd.read_parquet(path=file_path, columns=columns)

        if index_column:
            if index_column not in gdf.columns:
                raise ValueError(f"Column {index_column} doesn't exist in a file.")
            gdf.set_index(index_column, inplace=True)

        gdf.to_crs(crs=WGS84_CRS, inplace=True)

        if area is not None:
            area_wgs84 = area.to_crs(crs=WGS84_CRS)
            gdf = gdf.clip(mask=area_wgs84, keep_geom_type=False)

        return gdf

load

load(file_path: Union[Path, str], index_column: Optional[str] = None, columns: Optional[List[str]] = None, area: Optional[gpd.GeoDataFrame] = None) -> gpd.GeoDataFrame

Load a geoparquet file.

PARAMETER	DESCRIPTION
`file_path`	parquet file path. TYPE: `Union[Path, str]`
`index_column`	Column that will be used as an index. If not provided, automatic indexing will be applied by default. Defaults to None. TYPE: `str` DEFAULT: `None`
`columns`	List of columns to load. If not provided, all will be loaded. Defaults to None. TYPE: `List[str]` DEFAULT: `None`
`area`	Mask to clip loaded data. If not provided, unaltered data will be returned. Defaults to None. TYPE: `gpd.GeoDataFrame` DEFAULT: `None`

RAISES	DESCRIPTION
`ValueError`	If provided index column doesn't exists in list of loaded columns.

RETURNS	DESCRIPTION
`gpd.GeoDataFrame`	gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.

Source code in srai/loaders/geoparquet_loader.py

def load(
    self,
    file_path: Union[Path, str],
    index_column: Optional[str] = None,
    columns: Optional[List[str]] = None,
    area: Optional[gpd.GeoDataFrame] = None,
) -> gpd.GeoDataFrame:
    """
    Load a geoparquet file.

    Args:
        file_path (Union[Path, str]): parquet file path.
        index_column (str, optional): Column that will be used as an index.
            If not provided, automatic indexing will be applied by default. Defaults to None.
        columns (List[str], optional): List of columns to load.
            If not provided, all will be loaded. Defaults to None.
        area (gpd.GeoDataFrame, optional): Mask to clip loaded data.
            If not provided, unaltered data will be returned. Defaults to None.

    Raises:
        ValueError: If provided index column doesn't exists in list of loaded columns.

    Returns:
        gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.
    """
    if columns and GEOMETRY_COLUMN not in columns:
        columns.append(GEOMETRY_COLUMN)

    gdf = gpd.read_parquet(path=file_path, columns=columns)

    if index_column:
        if index_column not in gdf.columns:
            raise ValueError(f"Column {index_column} doesn't exist in a file.")
        gdf.set_index(index_column, inplace=True)

    gdf.to_crs(crs=WGS84_CRS, inplace=True)

    if area is not None:
        area_wgs84 = area.to_crs(crs=WGS84_CRS)
        gdf = gdf.clip(mask=area_wgs84, keep_geom_type=False)

    return gdf