Skip to content

Index

This module contains loaders, used to load spatial data from different sources.

We want to unify loading from different data sources into a single interface. Thanks to this, we have a unified spatial data format, which makes it possible to feed them into any of the embedding methods available in this library.

Loader

Bases: ABC

Abstract class for loaders.

load(*args, **kwargs)

abstractmethod

Load data for a given area.

PARAMETER DESCRIPTION
*args

Positional arguments dependating on a specific loader.

TYPE: Any DEFAULT: ()

**kwargs

Keyword arguments dependating on a specific loader.

TYPE: Any DEFAULT: {}

RETURNS DESCRIPTION
GeoDataFrame

GeoDataFrame with the downloaded data.

Source code in srai/loaders/_base.py
@abc.abstractmethod
def load(self, *args: Any, **kwargs: Any) -> gpd.GeoDataFrame:  # pragma: no cover
    """
    Load data for a given area.

    Args:
        *args: Positional arguments dependating on a specific loader.
        **kwargs: Keyword arguments dependating on a specific loader.

    Returns:
        GeoDataFrame with the downloaded data.
    """
    raise NotImplementedError

GeoparquetLoader

Bases: Loader

GeoparquetLoader.

Geoparquet [1] loader is a wrapper for a geopandas.read_parquet function and allows for an automatic index setting and additional geometry clipping.

References
  1. https://github.com/opengeospatial/geoparquet

load(file_path, index_column=None, columns=None, area=None)

Load a geoparquet file.

PARAMETER DESCRIPTION
file_path

parquet file path.

TYPE: Union[Path, str]

index_column

Column that will be used as an index. If not provided, automatic indexing will be applied by default. Defaults to None.

TYPE: str DEFAULT: None

columns

List of columns to load. If not provided, all will be loaded. Defaults to None.

TYPE: List[str] DEFAULT: None

area

Mask to clip loaded data. If not provided, unaltered data will be returned. Defaults to None.

TYPE: GeoDataFrame DEFAULT: None

RAISES DESCRIPTION
ValueError

If provided index column doesn't exists in list of loaded columns.

RETURNS DESCRIPTION
GeoDataFrame

gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.

Source code in srai/loaders/geoparquet_loader.py
def load(
    self,
    file_path: Union[Path, str],
    index_column: Optional[str] = None,
    columns: Optional[list[str]] = None,
    area: Optional[gpd.GeoDataFrame] = None,
) -> gpd.GeoDataFrame:
    """
    Load a geoparquet file.

    Args:
        file_path (Union[Path, str]): parquet file path.
        index_column (str, optional): Column that will be used as an index.
            If not provided, automatic indexing will be applied by default. Defaults to None.
        columns (List[str], optional): List of columns to load.
            If not provided, all will be loaded. Defaults to None.
        area (gpd.GeoDataFrame, optional): Mask to clip loaded data.
            If not provided, unaltered data will be returned. Defaults to None.

    Raises:
        ValueError: If provided index column doesn't exists in list of loaded columns.

    Returns:
        gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.
    """
    if columns and GEOMETRY_COLUMN not in columns:
        columns.append(GEOMETRY_COLUMN)

    gdf = gpd.read_parquet(path=file_path, columns=columns)

    if index_column:
        if index_column not in gdf.columns:
            raise ValueError(f"Column {index_column} doesn't exist in a file.")
        gdf.set_index(index_column, inplace=True)

    gdf.to_crs(crs=WGS84_CRS, inplace=True)

    if area is not None:
        area_wgs84 = area.to_crs(crs=WGS84_CRS)
        gdf = gdf.clip(mask=area_wgs84, keep_geom_type=False)

    return gdf

GTFSLoader()

Bases: Loader

GTFSLoader.

This loader is capable of reading GTFS feed and calculates time aggregations in 1H slots.

Source code in srai/loaders/gtfs_loader.py
def __init__(self) -> None:
    """Initialize GTFS loader."""
    import_optional_dependencies(dependency_group="gtfs", modules=["gtfs_kit"])

    self.time_resolution = "1H"

load(
    gtfs_file,
    fail_on_validation_errors=True,
    skip_validation=False,
)

Load GTFS feed and calculate time aggregations for stops.

PARAMETER DESCRIPTION
gtfs_file

Path to the GTFS feed.

TYPE: Path

fail_on_validation_errors

Fail if GTFS feed is invalid. Ignored when skip_validation is True.

TYPE: bool DEFAULT: True

skip_validation

Skip GTFS feed validation.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
GeoDataFrame

gpd.GeoDataFrame: GeoDataFrame with trip counts and list of directions for stops.

Source code in srai/loaders/gtfs_loader.py
def load(
    self,
    gtfs_file: Path,
    fail_on_validation_errors: bool = True,
    skip_validation: bool = False,
) -> gpd.GeoDataFrame:
    """
    Load GTFS feed and calculate time aggregations for stops.

    Args:
        gtfs_file (Path): Path to the GTFS feed.
        fail_on_validation_errors (bool): Fail if GTFS feed is invalid. Ignored when
            skip_validation is True.
        skip_validation (bool): Skip GTFS feed validation.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with trip counts and list of directions for stops.
    """
    import gtfs_kit as gk

    feed = gk.read_feed(gtfs_file, dist_units="km")

    if not skip_validation:
        self._validate_feed(feed, fail=fail_on_validation_errors)

    trips_df = self._load_trips(feed)
    directions_df = self._load_directions(feed)

    stops_df = feed.stops[["stop_id", "stop_lat", "stop_lon"]].set_index("stop_id")
    stops_df[GEOMETRY_COLUMN] = stops_df.apply(
        lambda row: Point([row["stop_lon"], row["stop_lat"]]), axis=1
    )

    result_gdf = gpd.GeoDataFrame(
        trips_df.merge(stops_df[GEOMETRY_COLUMN], how="inner", on="stop_id"),
        geometry=GEOMETRY_COLUMN,
        crs=WGS84_CRS,
    )

    result_gdf = result_gdf.merge(directions_df, how="left", on="stop_id")

    result_gdf.index.name = FEATURES_INDEX

    return result_gdf

OSMLoader

Bases: Loader, ABC

Abstract class for loaders.

load(area, tags)

abstractmethod

Load data for a given area.

PARAMETER DESCRIPTION
area

Shapely geometry with the area of interest.

TYPE: Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]

tags

OSM tags filter.

TYPE: Union[OsmTagsFilter, GroupedOsmTagsFilter]

RETURNS DESCRIPTION
GeoDataFrame

gpd.GeoDataFrame: GeoDataFrame with the downloaded data.

Source code in srai/loaders/osm_loaders/_base.py
@abc.abstractmethod
def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
) -> gpd.GeoDataFrame:  # pragma: no cover
    """
    Load data for a given area.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Shapely geometry with the area of interest.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): OSM tags filter.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with the downloaded data.
    """
    raise NotImplementedError

OSMOnlineLoader()

Bases: OSMLoader

OSMOnlineLoader.

OSM(OpenStreetMap)[1] online loader is a loader capable of downloading objects from a given area from OSM. It filters features based on OSM tags[2] in form of key:value pairs, that are used by OSM users to give meaning to geometries.

This loader is a wrapper around the osmnx library. It uses osmnx.geometries_from_polygon to make individual queries.

References
  1. https://www.openstreetmap.org/
  2. https://wiki.openstreetmap.org/wiki/Tags
Source code in srai/loaders/osm_loaders/osm_online_loader.py
def __init__(self) -> None:
    """Initialize OSMOnlineLoader."""
    import_optional_dependencies(dependency_group="osm", modules=["osmnx"])

load(area, tags)

Download OSM features with specified tags for a given area.

The loader first downloads all objects with tags. It returns a GeoDataFrame containing the geometry column and columns for tag keys.

Some key/value pairs might be missing from the resulting GeoDataFrame,

simply because there are no such objects in the given area.

PARAMETER DESCRIPTION
area

Area for which to download objects.

TYPE: Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]

tags

A dictionary specifying which tags to download. The keys should be OSM tags (e.g. building, amenity). The values should either be True for retrieving all objects with the tag, string for retrieving a single tag-value pair or list of strings for retrieving all values specified in the list. tags={'leisure': 'park} would return parks from the area. tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']} would return parks, all amenity types, bakeries and bicycle shops.

TYPE: Union[OsmTagsFilter, GroupedOsmTagsFilter]

RETURNS DESCRIPTION
GeoDataFrame

gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.

Source code in srai/loaders/osm_loaders/osm_online_loader.py
def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
) -> gpd.GeoDataFrame:
    """
    Download OSM features with specified tags for a given area.

    The loader first downloads all objects with `tags`. It returns a GeoDataFrame containing
    the `geometry` column and columns for tag keys.

    Note: Some key/value pairs might be missing from the resulting GeoDataFrame,
        simply because there are no such objects in the given area.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): A dictionary
            specifying which tags to download.
            The keys should be OSM tags (e.g. `building`, `amenity`).
            The values should either be `True` for retrieving all objects with the tag,
            string for retrieving a single tag-value pair
            or list of strings for retrieving all values specified in the list.
            `tags={'leisure': 'park}` would return parks from the area.
            `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}`
            would return parks, all amenity types, bakeries and bicycle shops.

    Returns:
        gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.
    """
    import osmnx as ox

    area_wgs84 = self._prepare_area_gdf(area)

    merged_tags = merge_osm_tags_filter(tags)

    _tags = self._flatten_tags(merged_tags)

    total_tags_num = len(_tags)
    total_queries = len(area_wgs84) * total_tags_num

    key_value_name_max_len = self._get_max_key_value_name_len(_tags)
    desc_max_len = key_value_name_max_len + len(self._PBAR_FORMAT.format("", ""))

    results = []

    osmnx_new_api = version.parse(ox.__version__) >= version.parse("1.5.0")
    osmnx_download_function = (
        ox.features_from_polygon if osmnx_new_api else ox.geometries_from_polygon
    )

    pbar = tqdm(product(area_wgs84[GEOMETRY_COLUMN], _tags), total=total_queries)
    for polygon, (key, value) in pbar:
        pbar.set_description(self._get_pbar_desc(key, value, desc_max_len))
        geometries = osmnx_download_function(polygon, {key: value})
        if not geometries.empty:
            results.append(geometries[[GEOMETRY_COLUMN, key]])

    result_gdf = self._group_gdfs(results).set_crs(WGS84_CRS)
    result_gdf = self._flatten_index(result_gdf)

    return self._parse_features_gdf_to_groups(result_gdf, tags)

OSMPbfLoader(
    pbf_file=None,
    download_source="geofabrik",
    download_directory="files",
)

Bases: OSMLoader

OSMPbfLoader.

OSM(OpenStreetMap)[1] PBF(Protocolbuffer Binary Format)[2] loader is a loader capable of loading OSM features from a PBF file. It filters features based on OSM tags[3] in form of key:value pairs, that are used by OSM users to give meaning to geometries.

This loader uses PbfFileReader from the QuackOSM[3] library. It utilizes the duckdb[4] engine with spatial[5] extension capable of parsing an *.osm.pbf file.

Additionally, it can download a pbf file extract for a given area using different sources.

References
  1. https://www.openstreetmap.org/
  2. https://wiki.openstreetmap.org/wiki/PBF_Format
  3. https://github.com/kraina-ai/quackosm
  4. https://duckdb.org/
  5. https://github.com/duckdb/duckdb_spatial
PARAMETER DESCRIPTION
pbf_file

Downloaded *.osm.pbf file to be used by the loader. If not provided, it will be automatically downloaded for a given area. Defaults to None.

TYPE: Union[str, Path] DEFAULT: None

download_source

Source to use when downloading PBF files. Can be one of: any, geofabrik, osmfr, bbbike. Defaults to "any".

TYPE: OsmExtractSource DEFAULT: 'geofabrik'

download_directory

Directory where to save the downloaded *.osm.pbf files. Ignored if pbf_file is provided. Defaults to "files".

TYPE: Union[str, Path] DEFAULT: 'files'

Source code in srai/loaders/osm_loaders/osm_pbf_loader.py
def __init__(
    self,
    pbf_file: Optional[Union[str, Path]] = None,
    download_source: "OsmExtractSource" = "geofabrik",
    download_directory: Union[str, Path] = "files",
) -> None:
    """
    Initialize OSMPbfLoader.

    Args:
        pbf_file (Union[str, Path], optional): Downloaded `*.osm.pbf` file to be used by
            the loader. If not provided, it will be automatically downloaded for a given area.
            Defaults to None.
        download_source (OsmExtractSource, optional): Source to use when downloading PBF files.
            Can be one of: `any`, `geofabrik`, `osmfr`, `bbbike`.
            Defaults to "any".
        download_directory (Union[str, Path], optional): Directory where to save the downloaded
            `*.osm.pbf` files. Ignored if `pbf_file` is provided. Defaults to "files".
    """
    import_optional_dependencies(dependency_group="osm", modules=["quackosm"])
    self.pbf_file = pbf_file
    self.download_source = download_source
    self.download_directory = download_directory

load(
    area,
    tags,
    ignore_cache=False,
    explode_tags=True,
    keep_all_tags=False,
)

Load OSM features with specified tags for a given area from an *.osm.pbf file.

The loader will use provided *.osm.pbf file, or download extracts automatically. Later it will parse and filter features from files using PbfFileReader from QuackOSM library. It will return a GeoDataFrame containing the geometry column and columns for tag keys.

Some key/value pairs might be missing from the resulting GeoDataFrame,

simply because there are no such objects in the given area.

PARAMETER DESCRIPTION
area

Area for which to download objects.

TYPE: Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]

tags

A dictionary specifying which tags to download. The keys should be OSM tags (e.g. building, amenity). The values should either be True for retrieving all objects with the tag, string for retrieving a single tag-value pair or list of strings for retrieving all values specified in the list. tags={'leisure': 'park} would return parks from the area. tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']} would return parks, all amenity types, bakeries and bicycle shops.

TYPE: Union[OsmTagsFilter, GroupedOsmTagsFilter]

ignore_cache

(bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False.

TYPE: bool DEFAULT: False

explode_tags

(bool, optional): Whether to split OSM tags into multiple columns or keep them in a single dict. Defaults to True.

TYPE: bool DEFAULT: True

keep_all_tags

(bool, optional): Whether to keep all tags related to the element, or return only those defined in the tags_filter. When True, will override the optional grouping defined in the tags_filter. Defaults to False.

TYPE: bool DEFAULT: False

RAISES DESCRIPTION
ValueError

If PBF file is expected to be downloaded and provided geometries aren't shapely.geometry.Polygons.

RETURNS DESCRIPTION
GeoDataFrame

gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.

Source code in srai/loaders/osm_loaders/osm_pbf_loader.py
def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
    ignore_cache: bool = False,
    explode_tags: bool = True,
    keep_all_tags: bool = False,
) -> gpd.GeoDataFrame:
    """
    Load OSM features with specified tags for a given area from an `*.osm.pbf` file.

    The loader will use provided `*.osm.pbf` file, or download extracts
    automatically. Later it will parse and filter features from files
    using `PbfFileReader` from `QuackOSM` library. It will return a GeoDataFrame
    containing the `geometry` column and columns for tag keys.

    Note: Some key/value pairs might be missing from the resulting GeoDataFrame,
        simply because there are no such objects in the given area.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): A dictionary
            specifying which tags to download.
            The keys should be OSM tags (e.g. `building`, `amenity`).
            The values should either be `True` for retrieving all objects with the tag,
            string for retrieving a single tag-value pair
            or list of strings for retrieving all values specified in the list.
            `tags={'leisure': 'park}` would return parks from the area.
            `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}`
            would return parks, all amenity types, bakeries and bicycle shops.
        ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
            Defaults to False.
        explode_tags: (bool, optional): Whether to split OSM tags into multiple columns or keep
            them in a single dict. Defaults to True.
        keep_all_tags: (bool, optional): Whether to keep all tags related to the element,
            or return only those defined in the `tags_filter`. When True, will override
            the optional grouping defined in the `tags_filter`. Defaults to False.

    Raises:
        ValueError: If PBF file is expected to be downloaded and provided geometries
            aren't shapely.geometry.Polygons.

    Returns:
        gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.
    """
    area_wgs84 = self._prepare_area_gdf(area)

    pbf_reader = self._get_pbf_file_reader(area_wgs84, tags)

    if self.pbf_file is not None:
        features_gdf = pbf_reader.get_features_gdf(
            file_paths=self.pbf_file,
            keep_all_tags=keep_all_tags,
            explode_tags=explode_tags,
            ignore_cache=ignore_cache,
        )
    else:
        features_gdf = pbf_reader.get_features_gdf_from_geometry(
            keep_all_tags=keep_all_tags, explode_tags=explode_tags, ignore_cache=ignore_cache
        )

    features_gdf = features_gdf.set_crs(WGS84_CRS)

    features_columns = [
        column
        for column in features_gdf.columns
        if column != GEOMETRY_COLUMN and features_gdf[column].notnull().any()
    ]
    features_gdf = features_gdf[[GEOMETRY_COLUMN, *sorted(features_columns)]]

    return features_gdf

load_to_geoparquet(
    area,
    tags,
    ignore_cache=False,
    explode_tags=True,
    keep_all_tags=False,
)

Load OSM features with specified tags for a given area and save it to geoparquet file.

PARAMETER DESCRIPTION
area

Area for which to download objects.

TYPE: Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]

tags

A dictionary specifying which tags to download. The keys should be OSM tags (e.g. building, amenity). The values should either be True for retrieving all objects with the tag, string for retrieving a single tag-value pair or list of strings for retrieving all values specified in the list. tags={'leisure': 'park} would return parks from the area. tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']} would return parks, all amenity types, bakeries and bicycle shops.

TYPE: Union[OsmTagsFilter, GroupedOsmTagsFilter]

ignore_cache

(bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False.

TYPE: bool DEFAULT: False

explode_tags

(bool, optional): Whether to split OSM tags into multiple columns or keep them in a single dict. Defaults to True.

TYPE: bool DEFAULT: True

keep_all_tags

(bool, optional): Whether to keep all tags related to the element, or return only those defined in the tags_filter. When True, will override the optional grouping defined in the tags_filter. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
Path

Path to the saved GeoParquet file.

TYPE: Path

Source code in srai/loaders/osm_loaders/osm_pbf_loader.py
def load_to_geoparquet(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
    ignore_cache: bool = False,
    explode_tags: bool = True,
    keep_all_tags: bool = False,
) -> Path:
    """
    Load OSM features with specified tags for a given area and save it to geoparquet file.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): A dictionary
            specifying which tags to download.
            The keys should be OSM tags (e.g. `building`, `amenity`).
            The values should either be `True` for retrieving all objects with the tag,
            string for retrieving a single tag-value pair
            or list of strings for retrieving all values specified in the list.
            `tags={'leisure': 'park}` would return parks from the area.
            `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}`
            would return parks, all amenity types, bakeries and bicycle shops.
        ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
            Defaults to False.
        explode_tags: (bool, optional): Whether to split OSM tags into multiple columns or keep
            them in a single dict. Defaults to True.
        keep_all_tags: (bool, optional): Whether to keep all tags related to the element,
            or return only those defined in the `tags_filter`. When True, will override
            the optional grouping defined in the `tags_filter`. Defaults to False.

    Returns:
        Path: Path to the saved GeoParquet file.
    """
    area_wgs84 = self._prepare_area_gdf(area)

    pbf_reader = self._get_pbf_file_reader(area_wgs84, tags)

    geoparquet_file_path: Path

    if self.pbf_file is not None:
        geoparquet_file_path = pbf_reader.convert_pbf_to_gpq(
            pbf_path=self.pbf_file,
            keep_all_tags=keep_all_tags,
            explode_tags=explode_tags,
            ignore_cache=ignore_cache,
        )
    else:
        geoparquet_file_path = pbf_reader.convert_geometry_filter_to_gpq(
            keep_all_tags=keep_all_tags, explode_tags=explode_tags, ignore_cache=ignore_cache
        )

    return geoparquet_file_path

OSMTileLoader(
    tile_server_url,
    zoom,
    verbose=False,
    resource_type="png",
    auth_token=None,
    data_collector=None,
    storage_path=None,
)

OSM Tile Loader.

Download raster tiles from user specified tile server, like listed in [1]. Loader finds x, y coordinates [2] for specified area and downloads tiles. Address is built with schema {tile_server_url}/{zoom}/{x}/{y}.{resource_type}

References
  1. https://wiki.openstreetmap.org/wiki/Raster_tile_providers
  2. https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames
PARAMETER DESCRIPTION
tile_server_url

url of tile server, without z, x, y parameters

TYPE: str

zoom

zoom level [1]

TYPE: int

verbose

should print logs. Defaults to False.

TYPE: bool DEFAULT: False

resource_type

file extension. Added to the end of url. Defaults to "png".

TYPE: str DEFAULT: 'png'

auth_token

auth token. Added as access_token parameter to request. Defaults to None.

TYPE: str DEFAULT: None

data_collector

DataCollector object or

TYPE: Union[str, DataCollector] DEFAULT: None

storage_path

path to save data, used with SavingDataCollector. Defaults to None.

TYPE: Union[str, Path] DEFAULT: None

References
  1. https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames
Source code in srai/loaders/osm_loaders/osm_tile_loader.py
def __init__(
    self,
    tile_server_url: str,
    zoom: int,
    verbose: bool = False,
    resource_type: str = "png",
    auth_token: Optional[str] = None,
    data_collector: Optional[Union[str, DataCollector]] = None,
    storage_path: Optional[Union[str, Path]] = None,
) -> None:
    """
    Initialize TileLoader.

    Args:
        tile_server_url (str): url of tile server, without z, x, y parameters
        zoom (int): zoom level [1]
        verbose (bool, optional): should print logs. Defaults to False.
        resource_type (str, optional): file extension. Added to the end of url.
            Defaults to "png".
        auth_token (str, optional): auth token. Added as access_token parameter
            to request. Defaults to None.
        data_collector (Union[str, DataCollector], optional): DataCollector object or
        enum defining default collector. If None uses InMemoryDataCollector. Defaults to None.
        If `return` uses  InMemoryDataCollector
        If `save` uses  SavingDataCollector
        storage_path (Union[str, Path], optional): path to save data,
            used with SavingDataCollector. Defaults to None.

    References:
        1. https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames
    """
    import_optional_dependencies(dependency_group="osm", modules=["PIL"])
    self.zoom = zoom
    self.verbose = verbose
    self.resource_type = resource_type
    self.base_url = urljoin(tile_server_url, "{0}/{1}/{2}." + resource_type)
    self.auth_token = auth_token
    self.save_path = storage_path
    self.data_collector = (
        self._get_collector(data_collector)
        if data_collector is not None
        else InMemoryDataCollector()
    )
    self.regionalizer = SlippyMapRegionalizer(zoom=self.zoom)

load(area)

Return all tiles of region.

PARAMETER DESCRIPTION
area

Area for which to download objects.

TYPE: Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]

RETURNS DESCRIPTION
GeoDataFrame

gpd.GeoDataFrame: Pandas of tiles for each region in area transformed by DataCollector

Source code in srai/loaders/osm_loaders/osm_tile_loader.py
def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
) -> gpd.GeoDataFrame:
    """
    Return all tiles of region.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.

    Returns:
        gpd.GeoDataFrame: Pandas of tiles for each region in area transformed by DataCollector
    """
    area_wgs84 = prepare_area_gdf_for_loader(area)
    regions = self.regionalizer.transform(gdf=area_wgs84)
    regions["tile"] = regions.apply(self._get_tile_for_area, axis=1)
    return regions

get_tile_by_x_y(x, y, idx=None)

Download single tile from tile server. Return tile processed by DataCollector.

PARAMETER DESCRIPTION
x(int)

x tile coordinate

y(int)

y tile coordinate

idx

id of tile, if non created as x_y_self.zoom

TYPE: Any DEFAULT: None

Source code in srai/loaders/osm_loaders/osm_tile_loader.py
def get_tile_by_x_y(self, x: int, y: int, idx: Any = None) -> Any:
    """
    Download single tile from tile server. Return tile processed by DataCollector.

    Args:
        x(int): x tile coordinate
        y(int): y tile coordinate
        idx (Any): id of tile, if non created as x_y_self.zoom
    """
    from PIL import Image

    if idx is None:
        idx = f"{x}_{y}_{self.zoom}"
    url = self.base_url.format(self.zoom, x, y)
    if self.verbose:
        print(f"Getting tile from url: {url}")
    content = requests.get(url, params=dict(access_token=self.auth_token)).content
    tile = Image.open(BytesIO(content))
    return self.data_collector.store(idx, tile)

OSMNetworkType

Bases: str, Enum

Type of the street network.

See [1] for more details.

References
  1. https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.graph.graph_from_place

OSMWayLoader(
    network_type,
    contain_within_area=False,
    preprocess=True,
    wide=True,
    metadata=False,
    osm_way_tags=constants.OSM_WAY_TAGS,
)

Bases: Loader

OSMWayLoader downloads road infrastructure from OSM.

OSMWayLoader loader is a wrapper for the osmnx.graph_from_polygon() and osmnx.graph_to_gdfs() that simplifies obtaining the road infrastructure data from OpenStreetMap. As the OSM data is often noisy, it can also take an opinionated approach to preprocessing it, with standardisation in mind - e.g. unification of units, discarding non-wiki values and rounding them.

PARAMETER DESCRIPTION
network_type

Type of the network to download.

TYPE: Union[NetworkType, str]

contain_within_area

defaults to False Whether to remove the roads that have one of their nodes outside of the given area.

TYPE: bool DEFAULT: False

preprocess

defaults to True Whether to preprocess the data.

TYPE: bool DEFAULT: True

wide

defaults to True Whether to return the roads in wide format.

TYPE: bool DEFAULT: True

metadata

defaults to False Whether to return metadata for roads.

TYPE: bool DEFAULT: False

osm_way_tags

defaults to constants.OSM_WAY_TAGS Dict of tags to take into consideration during computing.

TYPE: List[str] DEFAULT: OSM_WAY_TAGS

Source code in srai/loaders/osm_way_loader/osm_way_loader.py
def __init__(
    self,
    network_type: Union[OSMNetworkType, str],
    contain_within_area: bool = False,
    preprocess: bool = True,
    wide: bool = True,
    metadata: bool = False,
    osm_way_tags: dict[str, list[str]] = constants.OSM_WAY_TAGS,
) -> None:
    """
    Init OSMWayLoader.

    Args:
        network_type (Union[NetworkType, str]):
            Type of the network to download.
        contain_within_area (bool): defaults to False
            Whether to remove the roads that have one of their nodes outside of the given area.
        preprocess (bool): defaults to True
            Whether to preprocess the data.
        wide (bool): defaults to True
            Whether to return the roads in wide format.
        metadata (bool): defaults to False
            Whether to return metadata for roads.
        osm_way_tags (List[str]): defaults to constants.OSM_WAY_TAGS
            Dict of tags to take into consideration during computing.
    """
    import_optional_dependencies(dependency_group="osm", modules=["osmnx"])

    self.network_type = network_type
    self.contain_within_area = contain_within_area
    self.preprocess = preprocess
    self.wide = wide
    self.metadata = metadata
    self.osm_keys = list(osm_way_tags.keys())
    self.osm_tags_flat = (
        seq(osm_way_tags.items())
        .flat_map(lambda x: [f"{x[0]}-{v}" if x[0] not in ("oneway") else x[0] for v in x[1]])
        .distinct()
        .to_list()
    )

load(area)

Load road infrastructure for a given GeoDataFrame.

PARAMETER DESCRIPTION
area

(Multi)Polygons for which to download road infrastructure data.

TYPE: GeoDataFrame

RAISES DESCRIPTION
ValueError

If provided GeoDataFrame has no crs defined.

ValueError

If provided GeoDataFrame is empty.

TypeError

If provided geometries are not of type Polygon or MultiPolygon.

LoadedDataIsEmptyException

If none of the supplied area polygons contains any road infrastructure data.

RETURNS DESCRIPTION
tuple[GeoDataFrame, GeoDataFrame]

Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: Road infrastructure as (intersections, roads)

Source code in srai/loaders/osm_way_loader/osm_way_loader.py
def load(self, area: gpd.GeoDataFrame) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    """
    Load road infrastructure for a given GeoDataFrame.

    Args:
        area (gpd.GeoDataFrame): (Multi)Polygons for which to download road infrastructure data.

    Raises:
        ValueError: If provided GeoDataFrame has no crs defined.
        ValueError: If provided GeoDataFrame is empty.
        TypeError: If provided geometries are not of type Polygon or MultiPolygon.
        LoadedDataIsEmptyException: If none of the supplied area polygons contains
            any road infrastructure data.

    Returns:
        Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: Road infrastructure as (intersections, roads)
    """
    import osmnx as ox

    ox.settings.useful_tags_way = constants.OSMNX_WAY_KEYS
    ox.settings.timeout = constants.OSMNX_TIMEOUT

    if area.empty:
        raise ValueError("Provided `area` GeoDataFrame is empty.")

    gdf_wgs84 = area.to_crs(crs=WGS84_CRS)

    gdf_nodes_raw, gdf_edges_raw = self._graph_from_gdf(gdf_wgs84)
    if gdf_edges_raw.empty or gdf_edges_raw.empty:
        raise LoadedDataIsEmptyException(
            "It can happen when there is no road infrastructure in the given area."
        )

    gdf_edges = self._explode_cols(gdf_edges_raw)

    if self.preprocess:
        gdf_edges = self._preprocess(gdf_edges)

    if self.wide:
        gdf_edges = self._to_wide(gdf_edges_raw, gdf_edges)

    gdf_edges = self._unify_index_and_columns_names(gdf_edges)

    return gdf_nodes_raw, gdf_edges

download_file(
    url, fname, chunk_size=1024, force_download=True
)

Download a file with progress bar.

PARAMETER DESCRIPTION
url

URL to download.

TYPE: str

fname

File name.

TYPE: str

chunk_size

Chunk size.

TYPE: str DEFAULT: 1024

force_download

Flag to force download even if file exists.

TYPE: bool DEFAULT: True

Source: https://gist.github.com/yanqd0/c13ed29e29432e3cf3e7c38467f42f51

Source code in srai/loaders/download.py
def download_file(
    url: str, fname: str, chunk_size: int = 1024, force_download: bool = True
) -> None:
    """
    Download a file with progress bar.

    Args:
        url (str): URL to download.
        fname (str): File name.
        chunk_size (str): Chunk size.
        force_download (bool): Flag to force download even if file exists.

    Source: https://gist.github.com/yanqd0/c13ed29e29432e3cf3e7c38467f42f51
    """
    if Path(fname).exists() and not force_download:
        warnings.warn("File exists. Skipping download.", stacklevel=1)
        return

    Path(fname).parent.mkdir(parents=True, exist_ok=True)
    resp = requests.get(
        url,
        headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"},
        stream=True,
    )
    resp.raise_for_status()
    total = int(resp.headers.get("content-length", 0))
    with (
        open(fname, "wb") as file,
        tqdm(
            desc=fname.split("/")[-1],
            total=total,
            unit="iB",
            unit_scale=True,
            unit_divisor=1024,
        ) as bar,
    ):
        for data in resp.iter_content(chunk_size=chunk_size):
            size = file.write(data)
            bar.update(size)