Bases: Loader
  
      GeoparquetLoader.
Geoparquet [1] loader is a wrapper for a geopandas.read_parquet function
and allows for an automatic index setting and additional geometry clipping.
  References
  
- https://github.com/opengeospatial/geoparquet
 
            
              Source code in srai/loaders/geoparquet_loader.py
              |  | class GeoparquetLoader(Loader):
    """
    GeoparquetLoader.
    Geoparquet [1] loader is a wrapper for a `geopandas.read_parquet` function
    and allows for an automatic index setting and additional geometry clipping.
    References:
        1. https://github.com/opengeospatial/geoparquet
    """
    def load(
        self,
        file_path: Union[Path, str],
        index_column: Optional[str] = None,
        columns: Optional[List[str]] = None,
        area: Optional[gpd.GeoDataFrame] = None,
    ) -> gpd.GeoDataFrame:
        """
        Load a geoparquet file.
        Args:
            file_path (Union[Path, str]): parquet file path.
            index_column (str, optional): Column that will be used as an index.
                If not provided, automatic indexing will be applied by default. Defaults to None.
            columns (List[str], optional): List of columns to load.
                If not provided, all will be loaded. Defaults to None.
            area (gpd.GeoDataFrame, optional): Mask to clip loaded data.
                If not provided, unaltered data will be returned. Defaults to None.
        Raises:
            ValueError: If provided index column doesn't exists in list of loaded columns.
        Returns:
            gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.
        """
        if columns and GEOMETRY_COLUMN not in columns:
            columns.append(GEOMETRY_COLUMN)
        gdf = gpd.read_parquet(path=file_path, columns=columns)
        if index_column:
            if index_column not in gdf.columns:
                raise ValueError(f"Column {index_column} doesn't exist in a file.")
            gdf.set_index(index_column, inplace=True)
        gdf.to_crs(crs=WGS84_CRS, inplace=True)
        if area is not None:
            area_wgs84 = area.to_crs(crs=WGS84_CRS)
            gdf = gdf.clip(mask=area_wgs84, keep_geom_type=False)
        return gdf
 | 
 
  
  
          load
load(file_path: Union[Path, str], index_column: Optional[str] = None, columns: Optional[List[str]] = None, area: Optional[gpd.GeoDataFrame] = None) -> gpd.GeoDataFrame
  
      Load a geoparquet file.
  
    
      
        | PARAMETER | DESCRIPTION | 
    
    
        
          | file_path | 
                
                  TYPE:
                    Union[Path, str] | 
        
          | index_column | 
              Column that will be used as an index.
If not provided, automatic indexing will be applied by default. Defaults to None. 
                
                  TYPE:
                    strDEFAULT:None | 
        
          | columns | 
              List of columns to load.
If not provided, all will be loaded. Defaults to None. 
                
                  TYPE:
                    List[str]DEFAULT:None | 
        
          | area | 
              Mask to clip loaded data.
If not provided, unaltered data will be returned. Defaults to None. 
                
                  TYPE:
                    gpd.GeoDataFrameDEFAULT:None | 
    
  
  
    
      
        | RAISES | DESCRIPTION | 
    
    
        
          | ValueError | 
              If provided index column doesn't exists in list of loaded columns. | 
    
  
  
    
      
        | RETURNS | DESCRIPTION | 
    
    
        
          | gpd.GeoDataFrame | 
              gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame. | 
    
  
          
            Source code in srai/loaders/geoparquet_loader.py
            |  | def load(
    self,
    file_path: Union[Path, str],
    index_column: Optional[str] = None,
    columns: Optional[List[str]] = None,
    area: Optional[gpd.GeoDataFrame] = None,
) -> gpd.GeoDataFrame:
    """
    Load a geoparquet file.
    Args:
        file_path (Union[Path, str]): parquet file path.
        index_column (str, optional): Column that will be used as an index.
            If not provided, automatic indexing will be applied by default. Defaults to None.
        columns (List[str], optional): List of columns to load.
            If not provided, all will be loaded. Defaults to None.
        area (gpd.GeoDataFrame, optional): Mask to clip loaded data.
            If not provided, unaltered data will be returned. Defaults to None.
    Raises:
        ValueError: If provided index column doesn't exists in list of loaded columns.
    Returns:
        gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.
    """
    if columns and GEOMETRY_COLUMN not in columns:
        columns.append(GEOMETRY_COLUMN)
    gdf = gpd.read_parquet(path=file_path, columns=columns)
    if index_column:
        if index_column not in gdf.columns:
            raise ValueError(f"Column {index_column} doesn't exist in a file.")
        gdf.set_index(index_column, inplace=True)
    gdf.to_crs(crs=WGS84_CRS, inplace=True)
    if area is not None:
        area_wgs84 = area.to_crs(crs=WGS84_CRS)
        gdf = gdf.clip(mask=area_wgs84, keep_geom_type=False)
    return gdf
 |