Index

This module contains loaders, used to load spatial data from different sources.

We want to unify loading from different data sources into a single interface. Thanks to this, we have a unified spatial data format, which makes it possible to feed them into any of the embedding methods available in this library.

Loader

¶

Bases: ABC

Abstract class for loaders.

load(*args, **kwargs)

¶

abstractmethod

Load data for a given area.

PARAMETER	DESCRIPTION
`*args`	Positional arguments dependating on a specific loader. TYPE: `Any` DEFAULT: `()`
`**kwargs`	Keyword arguments dependating on a specific loader. TYPE: `Any` DEFAULT: `{}`

RETURNS	DESCRIPTION
`GeoDataFrame`	GeoDataFrame with the downloaded data.

Source code in srai/loaders/_base.py

@abc.abstractmethod
def load(self, *args: Any, **kwargs: Any) -> gpd.GeoDataFrame:  # pragma: no cover
    """
    Load data for a given area.

    Args:
        *args: Positional arguments dependating on a specific loader.
        **kwargs: Keyword arguments dependating on a specific loader.

    Returns:
        GeoDataFrame with the downloaded data.
    """
    raise NotImplementedError

GeoparquetLoader

¶

Bases: Loader

GeoparquetLoader.

Geoparquet [1] loader is a wrapper for a geopandas.read_parquet function and allows for an automatic index setting and additional geometry clipping.

References

https://github.com/opengeospatial/geoparquet

load(file_path, index_column=None, columns=None, area=None)

¶

Load a geoparquet file.

PARAMETER	DESCRIPTION
`file_path`	parquet file path. TYPE: `Union[Path, str]`
`index_column`	Column that will be used as an index. If not provided, automatic indexing will be applied by default. Defaults to None. TYPE: `str` DEFAULT: `None`
`columns`	List of columns to load. If not provided, all will be loaded. Defaults to None. TYPE: `List[str]` DEFAULT: `None`
`area`	Mask to clip loaded data. If not provided, unaltered data will be returned. Defaults to None. TYPE: `GeoDataFrame` DEFAULT: `None`

RAISES	DESCRIPTION
`ValueError`	If provided index column doesn't exists in list of loaded columns.

RETURNS	DESCRIPTION
`GeoDataFrame`	gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.

Source code in srai/loaders/geoparquet_loader.py

def load(
    self,
    file_path: Union[Path, str],
    index_column: Optional[str] = None,
    columns: Optional[list[str]] = None,
    area: Optional[gpd.GeoDataFrame] = None,
) -> gpd.GeoDataFrame:
    """
    Load a geoparquet file.

    Args:
        file_path (Union[Path, str]): parquet file path.
        index_column (str, optional): Column that will be used as an index.
            If not provided, automatic indexing will be applied by default. Defaults to None.
        columns (List[str], optional): List of columns to load.
            If not provided, all will be loaded. Defaults to None.
        area (gpd.GeoDataFrame, optional): Mask to clip loaded data.
            If not provided, unaltered data will be returned. Defaults to None.

    Raises:
        ValueError: If provided index column doesn't exists in list of loaded columns.

    Returns:
        gpd.GeoDataFrame: Loaded geoparquet file as a GeoDataFrame.
    """
    if columns and GEOMETRY_COLUMN not in columns:
        columns.append(GEOMETRY_COLUMN)

    gdf = gpd.read_parquet(path=file_path, columns=columns)

    if index_column:
        if index_column not in gdf.columns:
            raise ValueError(f"Column {index_column} doesn't exist in a file.")
        gdf.set_index(index_column, inplace=True)

    gdf.to_crs(crs=WGS84_CRS, inplace=True)

    if area is not None:
        area_wgs84 = area.to_crs(crs=WGS84_CRS)
        gdf = gdf.clip(mask=area_wgs84, keep_geom_type=False)

    return gdf

GTFSLoader()

¶

Bases: Loader

GTFSLoader.

This loader is capable of reading GTFS feed and calculates time aggregations in 1H slots.

Source code in srai/loaders/gtfs_loader.py

def __init__(self) -> None:
    """Initialize GTFS loader."""
    import_optional_dependencies(dependency_group="gtfs", modules=["gtfs_kit"])

    self.time_resolution = "1H"

load(
    gtfs_file,
    fail_on_validation_errors=True,
    skip_validation=False,
)

¶

Load GTFS feed and calculate time aggregations for stops.

PARAMETER	DESCRIPTION
`gtfs_file`	Path to the GTFS feed. TYPE: `Path`
`fail_on_validation_errors`	Fail if GTFS feed is invalid. Ignored when skip_validation is True. TYPE: `bool` DEFAULT: `True`
`skip_validation`	Skip GTFS feed validation. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`GeoDataFrame`	gpd.GeoDataFrame: GeoDataFrame with trip counts and list of directions for stops.

Source code in srai/loaders/gtfs_loader.py

def load(
    self,
    gtfs_file: Path,
    fail_on_validation_errors: bool = True,
    skip_validation: bool = False,
) -> gpd.GeoDataFrame:
    """
    Load GTFS feed and calculate time aggregations for stops.

    Args:
        gtfs_file (Path): Path to the GTFS feed.
        fail_on_validation_errors (bool): Fail if GTFS feed is invalid. Ignored when
            skip_validation is True.
        skip_validation (bool): Skip GTFS feed validation.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with trip counts and list of directions for stops.
    """
    import gtfs_kit as gk

    feed = gk.read_feed(gtfs_file, dist_units="km")

    if not skip_validation:
        self._validate_feed(feed, fail=fail_on_validation_errors)

    trips_df = self._load_trips(feed)
    directions_df = self._load_directions(feed)

    stops_df = feed.stops[["stop_id", "stop_lat", "stop_lon"]].set_index("stop_id")
    stops_df[GEOMETRY_COLUMN] = stops_df.apply(
        lambda row: Point([row["stop_lon"], row["stop_lat"]]), axis=1
    )

    result_gdf = gpd.GeoDataFrame(
        trips_df.merge(stops_df[GEOMETRY_COLUMN], how="inner", on="stop_id"),
        geometry=GEOMETRY_COLUMN,
        crs=WGS84_CRS,
    )

    result_gdf = result_gdf.merge(directions_df, how="left", on="stop_id")

    result_gdf.index.name = FEATURES_INDEX

    return result_gdf

OSMLoader

¶

Bases: Loader, ABC

Abstract class for loaders.

load(area, tags)

¶

abstractmethod

Load data for a given area.

PARAMETER	DESCRIPTION
`area`	Shapely geometry with the area of interest. TYPE: `Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]`
`tags`	OSM tags filter. TYPE: `Union[OsmTagsFilter, GroupedOsmTagsFilter]`

RETURNS	DESCRIPTION
`GeoDataFrame`	gpd.GeoDataFrame: GeoDataFrame with the downloaded data.

Source code in srai/loaders/osm_loaders/_base.py

@abc.abstractmethod
def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
) -> gpd.GeoDataFrame:  # pragma: no cover
    """
    Load data for a given area.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Shapely geometry with the area of interest.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): OSM tags filter.

    Returns:
        gpd.GeoDataFrame: GeoDataFrame with the downloaded data.
    """
    raise NotImplementedError

OSMOnlineLoader()

¶

Bases: OSMLoader

OSMOnlineLoader.

OSM(OpenStreetMap)[1] online loader is a loader capable of downloading objects from a given area from OSM. It filters features based on OSM tags[2] in form of key:value pairs, that are used by OSM users to give meaning to geometries.

This loader is a wrapper around the osmnx library. It uses osmnx.geometries_from_polygon to make individual queries.

References

Source code in srai/loaders/osm_loaders/osm_online_loader.py

def __init__(self) -> None:
    """Initialize OSMOnlineLoader."""
    import_optional_dependencies(dependency_group="osm", modules=["osmnx"])

load(area, tags)

¶

Download OSM features with specified tags for a given area.

The loader first downloads all objects with tags. It returns a GeoDataFrame containing the geometry column and columns for tag keys.

Some key/value pairs might be missing from the resulting GeoDataFrame,

simply because there are no such objects in the given area.

PARAMETER DESCRIPTION

area

Area for which to download objects.

TYPE: Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]

tags

A dictionary specifying which tags to download. The keys should be OSM tags (e.g. building, amenity). The values should either be True for retrieving all objects with the tag, string for retrieving a single tag-value pair or list of strings for retrieving all values specified in the list. tags={'leisure': 'park} would return parks from the area. tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']} would return parks, all amenity types, bakeries and bicycle shops.

TYPE: Union[OsmTagsFilter, GroupedOsmTagsFilter]

RETURNS	DESCRIPTION
`GeoDataFrame`	gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.

Source code in srai/loaders/osm_loaders/osm_online_loader.py

def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
) -> gpd.GeoDataFrame:
    """
    Download OSM features with specified tags for a given area.

    The loader first downloads all objects with `tags`. It returns a GeoDataFrame containing
    the `geometry` column and columns for tag keys.

    Note: Some key/value pairs might be missing from the resulting GeoDataFrame,
        simply because there are no such objects in the given area.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): A dictionary
            specifying which tags to download.
            The keys should be OSM tags (e.g. `building`, `amenity`).
            The values should either be `True` for retrieving all objects with the tag,
            string for retrieving a single tag-value pair
            or list of strings for retrieving all values specified in the list.
            `tags={'leisure': 'park}` would return parks from the area.
            `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}`
            would return parks, all amenity types, bakeries and bicycle shops.

    Returns:
        gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.
    """
    import osmnx as ox

    area_wgs84 = self._prepare_area_gdf(area)

    merged_tags = merge_osm_tags_filter(tags)

    _tags = self._flatten_tags(merged_tags)

    total_tags_num = len(_tags)
    total_queries = len(area_wgs84) * total_tags_num

    key_value_name_max_len = self._get_max_key_value_name_len(_tags)
    desc_max_len = key_value_name_max_len + len(self._PBAR_FORMAT.format("", ""))

    results = []

    osmnx_new_api = version.parse(ox.__version__) >= version.parse("1.5.0")
    osmnx_download_function = (
        ox.features_from_polygon if osmnx_new_api else ox.geometries_from_polygon
    )

    pbar = tqdm(product(area_wgs84[GEOMETRY_COLUMN], _tags), total=total_queries)
    for polygon, (key, value) in pbar:
        pbar.set_description(self._get_pbar_desc(key, value, desc_max_len))
        geometries = osmnx_download_function(polygon, {key: value})
        if not geometries.empty:
            results.append(geometries[[GEOMETRY_COLUMN, key]])

    result_gdf = self._group_gdfs(results).set_crs(WGS84_CRS)
    result_gdf = self._flatten_index(result_gdf)

    return self._parse_features_gdf_to_groups(result_gdf, tags)

OSMPbfLoader(
    pbf_file=None,
    download_source="geofabrik",
    download_directory="files",
)

¶

Bases: OSMLoader

OSMPbfLoader.

OSM(OpenStreetMap)[1] PBF(Protocolbuffer Binary Format)[2] loader is a loader capable of loading OSM features from a PBF file. It filters features based on OSM tags[3] in form of key:value pairs, that are used by OSM users to give meaning to geometries.

This loader uses PbfFileReader from the QuackOSM[3] library. It utilizes the duckdb[4] engine with spatial[5] extension capable of parsing an *.osm.pbf file.

Additionally, it can download a pbf file extract for a given area using different sources.

References

PARAMETER	DESCRIPTION
`pbf_file`	Downloaded `.osm.pbf` file to be used by the loader. If not provided, it will be automatically downloaded for a given area. Defaults to None. TYPE:* `Union[str, Path]` DEFAULT: `None`
`download_source`	Source to use when downloading PBF files. Can be one of: `any`, `geofabrik`, `osmfr`, `bbbike`. Defaults to "any". TYPE: `OsmExtractSource` DEFAULT: `'geofabrik'`
`download_directory`	Directory where to save the downloaded `.osm.pbf` files. Ignored if `pbf_file` is provided. Defaults to "files". TYPE:* `Union[str, Path]` DEFAULT: `'files'`

Source code in srai/loaders/osm_loaders/osm_pbf_loader.py

def __init__(
    self,
    pbf_file: Optional[Union[str, Path]] = None,
    download_source: "OsmExtractSource" = "geofabrik",
    download_directory: Union[str, Path] = "files",
) -> None:
    """
    Initialize OSMPbfLoader.

    Args:
        pbf_file (Union[str, Path], optional): Downloaded `*.osm.pbf` file to be used by
            the loader. If not provided, it will be automatically downloaded for a given area.
            Defaults to None.
        download_source (OsmExtractSource, optional): Source to use when downloading PBF files.
            Can be one of: `any`, `geofabrik`, `osmfr`, `bbbike`.
            Defaults to "any".
        download_directory (Union[str, Path], optional): Directory where to save the downloaded
            `*.osm.pbf` files. Ignored if `pbf_file` is provided. Defaults to "files".
    """
    import_optional_dependencies(dependency_group="osm", modules=["quackosm"])
    self.pbf_file = pbf_file
    self.download_source = download_source
    self.download_directory = download_directory

load(
    area,
    tags,
    ignore_cache=False,
    explode_tags=True,
    keep_all_tags=False,
)

¶

Load OSM features with specified tags for a given area from an *.osm.pbf file.

The loader will use provided *.osm.pbf file, or download extracts automatically. Later it will parse and filter features from files using PbfFileReader from QuackOSM library. It will return a GeoDataFrame containing the geometry column and columns for tag keys.

Some key/value pairs might be missing from the resulting GeoDataFrame,

simply because there are no such objects in the given area.

PARAMETER	DESCRIPTION
`area`	Area for which to download objects. TYPE: `Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]`
`tags`	A dictionary specifying which tags to download. The keys should be OSM tags (e.g. `building`, `amenity`). The values should either be `True` for retrieving all objects with the tag, string for retrieving a single tag-value pair or list of strings for retrieving all values specified in the list. `tags={'leisure': 'park}` would return parks from the area. `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}` would return parks, all amenity types, bakeries and bicycle shops. TYPE: `Union[OsmTagsFilter, GroupedOsmTagsFilter]`
`ignore_cache`	(bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False. TYPE: `bool` DEFAULT: `False`
`explode_tags`	(bool, optional): Whether to split OSM tags into multiple columns or keep them in a single dict. Defaults to True. TYPE: `bool` DEFAULT: `True`
`keep_all_tags`	(bool, optional): Whether to keep all tags related to the element, or return only those defined in the `tags_filter`. When True, will override the optional grouping defined in the `tags_filter`. Defaults to False. TYPE: `bool` DEFAULT: `False`

RAISES	DESCRIPTION
`ValueError`	If PBF file is expected to be downloaded and provided geometries aren't shapely.geometry.Polygons.

RETURNS	DESCRIPTION
`GeoDataFrame`	gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.

Source code in srai/loaders/osm_loaders/osm_pbf_loader.py

def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
    ignore_cache: bool = False,
    explode_tags: bool = True,
    keep_all_tags: bool = False,
) -> gpd.GeoDataFrame:
    """
    Load OSM features with specified tags for a given area from an `*.osm.pbf` file.

    The loader will use provided `*.osm.pbf` file, or download extracts
    automatically. Later it will parse and filter features from files
    using `PbfFileReader` from `QuackOSM` library. It will return a GeoDataFrame
    containing the `geometry` column and columns for tag keys.

    Note: Some key/value pairs might be missing from the resulting GeoDataFrame,
        simply because there are no such objects in the given area.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): A dictionary
            specifying which tags to download.
            The keys should be OSM tags (e.g. `building`, `amenity`).
            The values should either be `True` for retrieving all objects with the tag,
            string for retrieving a single tag-value pair
            or list of strings for retrieving all values specified in the list.
            `tags={'leisure': 'park}` would return parks from the area.
            `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}`
            would return parks, all amenity types, bakeries and bicycle shops.
        ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
            Defaults to False.
        explode_tags: (bool, optional): Whether to split OSM tags into multiple columns or keep
            them in a single dict. Defaults to True.
        keep_all_tags: (bool, optional): Whether to keep all tags related to the element,
            or return only those defined in the `tags_filter`. When True, will override
            the optional grouping defined in the `tags_filter`. Defaults to False.

    Raises:
        ValueError: If PBF file is expected to be downloaded and provided geometries
            aren't shapely.geometry.Polygons.

    Returns:
        gpd.GeoDataFrame: Downloaded features as a GeoDataFrame.
    """
    area_wgs84 = self._prepare_area_gdf(area)

    pbf_reader = self._get_pbf_file_reader(area_wgs84, tags)

    if self.pbf_file is not None:
        features_gdf = pbf_reader.get_features_gdf(
            file_paths=self.pbf_file,
            keep_all_tags=keep_all_tags,
            explode_tags=explode_tags,
            ignore_cache=ignore_cache,
        )
    else:
        features_gdf = pbf_reader.get_features_gdf_from_geometry(
            keep_all_tags=keep_all_tags, explode_tags=explode_tags, ignore_cache=ignore_cache
        )

    features_gdf = features_gdf.set_crs(WGS84_CRS)

    features_columns = [
        column
        for column in features_gdf.columns
        if column != GEOMETRY_COLUMN and features_gdf[column].notnull().any()
    ]
    features_gdf = features_gdf[[GEOMETRY_COLUMN, *sorted(features_columns)]]

    return features_gdf

load_to_geoparquet(
    area,
    tags,
    ignore_cache=False,
    explode_tags=True,
    keep_all_tags=False,
)

¶

Load OSM features with specified tags for a given area and save it to geoparquet file.

PARAMETER	DESCRIPTION
`area`	Area for which to download objects. TYPE: `Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]`
`tags`	A dictionary specifying which tags to download. The keys should be OSM tags (e.g. `building`, `amenity`). The values should either be `True` for retrieving all objects with the tag, string for retrieving a single tag-value pair or list of strings for retrieving all values specified in the list. `tags={'leisure': 'park}` would return parks from the area. `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}` would return parks, all amenity types, bakeries and bicycle shops. TYPE: `Union[OsmTagsFilter, GroupedOsmTagsFilter]`
`ignore_cache`	(bool, optional): Whether to ignore precalculated geoparquet files or not. Defaults to False. TYPE: `bool` DEFAULT: `False`
`explode_tags`	(bool, optional): Whether to split OSM tags into multiple columns or keep them in a single dict. Defaults to True. TYPE: `bool` DEFAULT: `True`
`keep_all_tags`	(bool, optional): Whether to keep all tags related to the element, or return only those defined in the `tags_filter`. When True, will override the optional grouping defined in the `tags_filter`. Defaults to False. TYPE: `bool` DEFAULT: `False`

RETURNS	DESCRIPTION
`Path`	Path to the saved GeoParquet file. TYPE: `Path`

Source code in srai/loaders/osm_loaders/osm_pbf_loader.py

def load_to_geoparquet(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
    tags: Union[OsmTagsFilter, GroupedOsmTagsFilter],
    ignore_cache: bool = False,
    explode_tags: bool = True,
    keep_all_tags: bool = False,
) -> Path:
    """
    Load OSM features with specified tags for a given area and save it to geoparquet file.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.
        tags (Union[OsmTagsFilter, GroupedOsmTagsFilter]): A dictionary
            specifying which tags to download.
            The keys should be OSM tags (e.g. `building`, `amenity`).
            The values should either be `True` for retrieving all objects with the tag,
            string for retrieving a single tag-value pair
            or list of strings for retrieving all values specified in the list.
            `tags={'leisure': 'park}` would return parks from the area.
            `tags={'leisure': 'park, 'amenity': True, 'shop': ['bakery', 'bicycle']}`
            would return parks, all amenity types, bakeries and bicycle shops.
        ignore_cache: (bool, optional): Whether to ignore precalculated geoparquet files or not.
            Defaults to False.
        explode_tags: (bool, optional): Whether to split OSM tags into multiple columns or keep
            them in a single dict. Defaults to True.
        keep_all_tags: (bool, optional): Whether to keep all tags related to the element,
            or return only those defined in the `tags_filter`. When True, will override
            the optional grouping defined in the `tags_filter`. Defaults to False.

    Returns:
        Path: Path to the saved GeoParquet file.
    """
    area_wgs84 = self._prepare_area_gdf(area)

    pbf_reader = self._get_pbf_file_reader(area_wgs84, tags)

    geoparquet_file_path: Path

    if self.pbf_file is not None:
        geoparquet_file_path = pbf_reader.convert_pbf_to_gpq(
            pbf_path=self.pbf_file,
            keep_all_tags=keep_all_tags,
            explode_tags=explode_tags,
            ignore_cache=ignore_cache,
        )
    else:
        geoparquet_file_path = pbf_reader.convert_geometry_filter_to_gpq(
            keep_all_tags=keep_all_tags, explode_tags=explode_tags, ignore_cache=ignore_cache
        )

    return geoparquet_file_path

OSMTileLoader(
    tile_server_url,
    zoom,
    verbose=False,
    resource_type="png",
    auth_token=None,
    data_collector=None,
    storage_path=None,
)

¶

OSM Tile Loader.

Download raster tiles from user specified tile server, like listed in [1]. Loader finds x, y coordinates [2] for specified area and downloads tiles. Address is built with schema {tile_server_url}/{zoom}/{x}/{y}.{resource_type}

References

PARAMETER	DESCRIPTION
`tile_server_url`	url of tile server, without z, x, y parameters TYPE: `str`
`zoom`	zoom level [1] TYPE: `int`
`verbose`	should print logs. Defaults to False. TYPE: `bool` DEFAULT: `False`
`resource_type`	file extension. Added to the end of url. Defaults to "png". TYPE: `str` DEFAULT: `'png'`
`auth_token`	auth token. Added as access_token parameter to request. Defaults to None. TYPE: `str` DEFAULT: `None`
`data_collector`	DataCollector object or TYPE: `Union[str, DataCollector]` DEFAULT: `None`
`storage_path`	path to save data, used with SavingDataCollector. Defaults to None. TYPE: `Union[str, Path]` DEFAULT: `None`

References

https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames

Source code in srai/loaders/osm_loaders/osm_tile_loader.py

def __init__(
    self,
    tile_server_url: str,
    zoom: int,
    verbose: bool = False,
    resource_type: str = "png",
    auth_token: Optional[str] = None,
    data_collector: Optional[Union[str, DataCollector]] = None,
    storage_path: Optional[Union[str, Path]] = None,
) -> None:
    """
    Initialize TileLoader.

    Args:
        tile_server_url (str): url of tile server, without z, x, y parameters
        zoom (int): zoom level [1]
        verbose (bool, optional): should print logs. Defaults to False.
        resource_type (str, optional): file extension. Added to the end of url.
            Defaults to "png".
        auth_token (str, optional): auth token. Added as access_token parameter
            to request. Defaults to None.
        data_collector (Union[str, DataCollector], optional): DataCollector object or
        enum defining default collector. If None uses InMemoryDataCollector. Defaults to None.
        If `return` uses  InMemoryDataCollector
        If `save` uses  SavingDataCollector
        storage_path (Union[str, Path], optional): path to save data,
            used with SavingDataCollector. Defaults to None.

    References:
        1. https://wiki.openstreetmap.org/wiki/Slippy_map_tilenames
    """
    import_optional_dependencies(dependency_group="osm", modules=["PIL"])
    self.zoom = zoom
    self.verbose = verbose
    self.resource_type = resource_type
    self.base_url = urljoin(tile_server_url, "{0}/{1}/{2}." + resource_type)
    self.auth_token = auth_token
    self.save_path = storage_path
    self.data_collector = (
        self._get_collector(data_collector)
        if data_collector is not None
        else InMemoryDataCollector()
    )
    self.regionalizer = SlippyMapRegionalizer(zoom=self.zoom)

load(area)

¶

Return all tiles of region.

PARAMETER	DESCRIPTION
`area`	Area for which to download objects. TYPE: `Union[BaseGeometry, Iterable[BaseGeometry], GeoSeries, GeoDataFrame]`

RETURNS	DESCRIPTION
`GeoDataFrame`	gpd.GeoDataFrame: Pandas of tiles for each region in area transformed by DataCollector

Source code in srai/loaders/osm_loaders/osm_tile_loader.py

def load(
    self,
    area: Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame],
) -> gpd.GeoDataFrame:
    """
    Return all tiles of region.

    Args:
        area (Union[BaseGeometry, Iterable[BaseGeometry], gpd.GeoSeries, gpd.GeoDataFrame]):
            Area for which to download objects.

    Returns:
        gpd.GeoDataFrame: Pandas of tiles for each region in area transformed by DataCollector
    """
    area_wgs84 = prepare_area_gdf_for_loader(area)
    regions = self.regionalizer.transform(gdf=area_wgs84)
    regions["tile"] = regions.apply(self._get_tile_for_area, axis=1)
    return regions

get_tile_by_x_y(x, y, idx=None)

¶

Download single tile from tile server. Return tile processed by DataCollector.

PARAMETER	DESCRIPTION
`x(int)`	x tile coordinate
`y(int)`	y tile coordinate
`idx`	id of tile, if non created as x_y_self.zoom TYPE: `Any` DEFAULT: `None`

Source code in srai/loaders/osm_loaders/osm_tile_loader.py

def get_tile_by_x_y(self, x: int, y: int, idx: Any = None) -> Any:
    """
    Download single tile from tile server. Return tile processed by DataCollector.

    Args:
        x(int): x tile coordinate
        y(int): y tile coordinate
        idx (Any): id of tile, if non created as x_y_self.zoom
    """
    from PIL import Image

    if idx is None:
        idx = f"{x}_{y}_{self.zoom}"
    url = self.base_url.format(self.zoom, x, y)
    if self.verbose:
        print(f"Getting tile from url: {url}")
    content = requests.get(url, params=dict(access_token=self.auth_token)).content
    tile = Image.open(BytesIO(content))
    return self.data_collector.store(idx, tile)

OSMNetworkType

¶

Bases: str, Enum

Type of the street network.

See [1] for more details.

References

https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.graph.graph_from_place

OSMWayLoader(
    network_type,
    contain_within_area=False,
    preprocess=True,
    wide=True,
    metadata=False,
    osm_way_tags=constants.OSM_WAY_TAGS,
)

¶

Bases: Loader

OSMWayLoader downloads road infrastructure from OSM.

OSMWayLoader loader is a wrapper for the osmnx.graph_from_polygon() and osmnx.graph_to_gdfs() that simplifies obtaining the road infrastructure data from OpenStreetMap. As the OSM data is often noisy, it can also take an opinionated approach to preprocessing it, with standardisation in mind - e.g. unification of units, discarding non-wiki values and rounding them.

PARAMETER	DESCRIPTION
`network_type`	Type of the network to download. TYPE: `Union[NetworkType, str]`
`contain_within_area`	defaults to False Whether to remove the roads that have one of their nodes outside of the given area. TYPE: `bool` DEFAULT: `False`
`preprocess`	defaults to True Whether to preprocess the data. TYPE: `bool` DEFAULT: `True`
`wide`	defaults to True Whether to return the roads in wide format. TYPE: `bool` DEFAULT: `True`
`metadata`	defaults to False Whether to return metadata for roads. TYPE: `bool` DEFAULT: `False`
`osm_way_tags`	defaults to constants.OSM_WAY_TAGS Dict of tags to take into consideration during computing. TYPE: `List[str]` DEFAULT: `OSM_WAY_TAGS`

Source code in srai/loaders/osm_way_loader/osm_way_loader.py

def __init__(
    self,
    network_type: Union[OSMNetworkType, str],
    contain_within_area: bool = False,
    preprocess: bool = True,
    wide: bool = True,
    metadata: bool = False,
    osm_way_tags: dict[str, list[str]] = constants.OSM_WAY_TAGS,
) -> None:
    """
    Init OSMWayLoader.

    Args:
        network_type (Union[NetworkType, str]):
            Type of the network to download.
        contain_within_area (bool): defaults to False
            Whether to remove the roads that have one of their nodes outside of the given area.
        preprocess (bool): defaults to True
            Whether to preprocess the data.
        wide (bool): defaults to True
            Whether to return the roads in wide format.
        metadata (bool): defaults to False
            Whether to return metadata for roads.
        osm_way_tags (List[str]): defaults to constants.OSM_WAY_TAGS
            Dict of tags to take into consideration during computing.
    """
    import_optional_dependencies(dependency_group="osm", modules=["osmnx"])

    self.network_type = network_type
    self.contain_within_area = contain_within_area
    self.preprocess = preprocess
    self.wide = wide
    self.metadata = metadata
    self.osm_keys = list(osm_way_tags.keys())
    self.osm_tags_flat = (
        seq(osm_way_tags.items())
        .flat_map(lambda x: [f"{x[0]}-{v}" if x[0] not in ("oneway") else x[0] for v in x[1]])
        .distinct()
        .to_list()
    )

load(area)

¶

Load road infrastructure for a given GeoDataFrame.

PARAMETER	DESCRIPTION
`area`	(Multi)Polygons for which to download road infrastructure data. TYPE: `GeoDataFrame`

RAISES	DESCRIPTION
`ValueError`	If provided GeoDataFrame has no crs defined.
`ValueError`	If provided GeoDataFrame is empty.
`TypeError`	If provided geometries are not of type Polygon or MultiPolygon.
`LoadedDataIsEmptyException`	If none of the supplied area polygons contains any road infrastructure data.

RETURNS	DESCRIPTION
`tuple[GeoDataFrame, GeoDataFrame]`	Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: Road infrastructure as (intersections, roads)

Source code in srai/loaders/osm_way_loader/osm_way_loader.py

def load(self, area: gpd.GeoDataFrame) -> tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]:
    """
    Load road infrastructure for a given GeoDataFrame.

    Args:
        area (gpd.GeoDataFrame): (Multi)Polygons for which to download road infrastructure data.

    Raises:
        ValueError: If provided GeoDataFrame has no crs defined.
        ValueError: If provided GeoDataFrame is empty.
        TypeError: If provided geometries are not of type Polygon or MultiPolygon.
        LoadedDataIsEmptyException: If none of the supplied area polygons contains
            any road infrastructure data.

    Returns:
        Tuple[gpd.GeoDataFrame, gpd.GeoDataFrame]: Road infrastructure as (intersections, roads)
    """
    import osmnx as ox

    ox.settings.useful_tags_way = constants.OSMNX_WAY_KEYS
    ox.settings.timeout = constants.OSMNX_TIMEOUT

    if area.empty:
        raise ValueError("Provided `area` GeoDataFrame is empty.")

    gdf_wgs84 = area.to_crs(crs=WGS84_CRS)

    gdf_nodes_raw, gdf_edges_raw = self._graph_from_gdf(gdf_wgs84)
    if gdf_edges_raw.empty or gdf_edges_raw.empty:
        raise LoadedDataIsEmptyException(
            "It can happen when there is no road infrastructure in the given area."
        )

    gdf_edges = self._explode_cols(gdf_edges_raw)

    if self.preprocess:
        gdf_edges = self._preprocess(gdf_edges)

    if self.wide:
        gdf_edges = self._to_wide(gdf_edges_raw, gdf_edges)

    gdf_edges = self._unify_index_and_columns_names(gdf_edges)

    return gdf_nodes_raw, gdf_edges

download_file(
    url, fname, chunk_size=1024, force_download=True
)

¶

Download a file with progress bar.

PARAMETER	DESCRIPTION
`url`	URL to download. TYPE: `str`
`fname`	File name. TYPE: `str`
`chunk_size`	Chunk size. TYPE: `str` DEFAULT: `1024`
`force_download`	Flag to force download even if file exists. TYPE: `bool` DEFAULT: `True`

Source: https://gist.github.com/yanqd0/c13ed29e29432e3cf3e7c38467f42f51

Source code in srai/loaders/download.py

def download_file(
    url: str, fname: str, chunk_size: int = 1024, force_download: bool = True
) -> None:
    """
    Download a file with progress bar.

    Args:
        url (str): URL to download.
        fname (str): File name.
        chunk_size (str): Chunk size.
        force_download (bool): Flag to force download even if file exists.

    Source: https://gist.github.com/yanqd0/c13ed29e29432e3cf3e7c38467f42f51
    """
    if Path(fname).exists() and not force_download:
        warnings.warn("File exists. Skipping download.", stacklevel=1)
        return

    Path(fname).parent.mkdir(parents=True, exist_ok=True)
    resp = requests.get(
        url,
        headers={"User-Agent": "SRAI Python package (https://github.com/kraina-ai/srai)"},
        stream=True,
    )
    resp.raise_for_status()
    total = int(resp.headers.get("content-length", 0))
    with (
        open(fname, "wb") as file,
        tqdm(
            desc=fname.split("/")[-1],
            total=total,
            unit="iB",
            unit_scale=True,
            unit_divisor=1024,
        ) as bar,
    ):
        for data in resp.iter_content(chunk_size=chunk_size):
            size = file.write(data)
            bar.update(size)

Index

`Loader`
¶

`load(*args, **kwargs)`
¶

`GeoparquetLoader`
¶

`load(file_path, index_column=None, columns=None, area=None)`
¶

`GTFSLoader()`
¶

`load( gtfs_file, fail_on_validation_errors=True, skip_validation=False, )`
¶

`OSMLoader`
¶

`load(area, tags)`
¶

`OSMOnlineLoader()`
¶

`load(area, tags)`
¶

`OSMPbfLoader( pbf_file=None, download_source="geofabrik", download_directory="files", )`
¶

`load( area, tags, ignore_cache=False, explode_tags=True, keep_all_tags=False, )`
¶

`load_to_geoparquet( area, tags, ignore_cache=False, explode_tags=True, keep_all_tags=False, )`
¶

`OSMTileLoader( tile_server_url, zoom, verbose=False, resource_type="png", auth_token=None, data_collector=None, storage_path=None, )`
¶

`load(area)`
¶

`get_tile_by_x_y(x, y, idx=None)`
¶

`OSMNetworkType`
¶

`OSMWayLoader( network_type, contain_within_area=False, preprocess=True, wide=True, metadata=False, osm_way_tags=constants.OSM_WAY_TAGS, )`
¶

`load(area)`
¶

`download_file( url, fname, chunk_size=1024, force_download=True )`
¶

Index

Loader ¶

load(*args, **kwargs) ¶

GeoparquetLoader ¶

load(file_path, index_column=None, columns=None, area=None) ¶

GTFSLoader() ¶

load( gtfs_file, fail_on_validation_errors=True, skip_validation=False, ) ¶

OSMLoader ¶

load(area, tags) ¶

OSMOnlineLoader() ¶

load(area, tags) ¶

OSMPbfLoader( pbf_file=None, download_source="geofabrik", download_directory="files", ) ¶

load( area, tags, ignore_cache=False, explode_tags=True, keep_all_tags=False, ) ¶

load_to_geoparquet( area, tags, ignore_cache=False, explode_tags=True, keep_all_tags=False, ) ¶

OSMTileLoader( tile_server_url, zoom, verbose=False, resource_type="png", auth_token=None, data_collector=None, storage_path=None, ) ¶

load(area) ¶

get_tile_by_x_y(x, y, idx=None) ¶

OSMNetworkType ¶

OSMWayLoader( network_type, contain_within_area=False, preprocess=True, wide=True, metadata=False, osm_way_tags=constants.OSM_WAY_TAGS, ) ¶

load(area) ¶

download_file( url, fname, chunk_size=1024, force_download=True ) ¶

`Loader`
¶

`load(*args, **kwargs)`
¶

`GeoparquetLoader`
¶

`load(file_path, index_column=None, columns=None, area=None)`
¶

`GTFSLoader()`
¶

`load( gtfs_file, fail_on_validation_errors=True, skip_validation=False, )`
¶

`OSMLoader`
¶

`load(area, tags)`
¶

`OSMOnlineLoader()`
¶

`load(area, tags)`
¶

`OSMPbfLoader( pbf_file=None, download_source="geofabrik", download_directory="files", )`
¶

`load( area, tags, ignore_cache=False, explode_tags=True, keep_all_tags=False, )`
¶

`load_to_geoparquet( area, tags, ignore_cache=False, explode_tags=True, keep_all_tags=False, )`
¶

`OSMTileLoader( tile_server_url, zoom, verbose=False, resource_type="png", auth_token=None, data_collector=None, storage_path=None, )`
¶

`load(area)`
¶

`get_tile_by_x_y(x, y, idx=None)`
¶

`OSMNetworkType`
¶

`OSMWayLoader( network_type, contain_within_area=False, preprocess=True, wide=True, metadata=False, osm_way_tags=constants.OSM_WAY_TAGS, )`
¶

`load(area)`
¶

`download_file( url, fname, chunk_size=1024, force_download=True )`
¶