Skip to content

Index

OpenStreetMap extracts.

This module contains iterators for publically available OpenStreetMap *.osm.pbf files repositories.

OsmExtractSource

Bases: str, Enum

Enum of available OSM extract sources.

download_extracts_pbf_files(
    extracts, download_directory, progressbar=True
)

Download OSM extracts as PBF files.

PARAMETER DESCRIPTION
extracts

List of extracts to download.

TYPE: list[OpenStreetMapExtract]

download_directory

Directory where PBF files should be saved.

TYPE: Path

progressbar

Show progress bar. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
list[Path]

list[Path]: List of downloaded file paths.

Source code in quackosm/osm_extracts/__init__.py
def download_extracts_pbf_files(
    extracts: list[OpenStreetMapExtract], download_directory: Path, progressbar: bool = True
) -> list[Path]:
    """
    Download OSM extracts as PBF files.

    Args:
        extracts (list[OpenStreetMapExtract]): List of extracts to download.
        download_directory (Path): Directory where PBF files should be saved.
        progressbar (bool, optional): Show progress bar. Defaults to True.

    Returns:
        list[Path]: List of downloaded file paths.
    """
    downloaded_extracts_paths = []
    logger = get_pooch_logger()
    logger.setLevel("WARNING")
    for extract in extracts:
        file_path = retrieve(
            extract.url,
            fname=f"{extract.file_name}.osm.pbf",
            path=download_directory,
            progressbar=progressbar and not FORCE_TERMINAL,
            known_hash=None,
        )
        downloaded_extracts_paths.append(Path(file_path))
    return downloaded_extracts_paths

get_extract_by_query(query, source='any')

Find an OSM extract by name.

PARAMETER DESCRIPTION
query

Query to search for a particular extract.

TYPE: str

source

OSM source name. Can be one of: 'any', 'Geofabrik', 'BBBike', 'OSM_fr'. Defaults to 'any'.

TYPE: Union[OsmExtractSource, str] DEFAULT: 'any'

RETURNS DESCRIPTION
OpenStreetMapExtract

Found extract.

TYPE: OpenStreetMapExtract

Source code in quackosm/osm_extracts/__init__.py
def get_extract_by_query(
    query: str, source: Union[OsmExtractSource, str] = "any"
) -> OpenStreetMapExtract:
    """
    Find an OSM extract by name.

    Args:
        query (str): Query to search for a particular extract.
        source (Union[OsmExtractSource, str]): OSM source name. Can be one of: 'any', 'Geofabrik',
            'BBBike', 'OSM_fr'. Defaults to 'any'.

    Returns:
        OpenStreetMapExtract: Found extract.
    """
    try:
        source_enum = OsmExtractSource(source)
        index = OSM_EXTRACT_SOURCE_INDEX_FUNCTION.get(source_enum, _get_combined_index)()

        matching_index_row: pd.Series = None

        file_name_matched_rows = (index["file_name"].str.lower() == query.lower().strip()) | (
            index["file_name"].str.replace("_", " ").str.lower()
            == query.lower().replace("_", " ").strip()
        )
        extract_name_matched_rows = (index["name"].str.lower() == query.lower().strip()) | (
            index["name"].str.replace("_", " ").str.lower()
            == query.lower().replace("_", " ").strip()
        )

        # full file name matched
        if sum(file_name_matched_rows) == 1:
            matching_index_row = index[file_name_matched_rows].iloc[0]
        # single name matched
        elif sum(extract_name_matched_rows) == 1:
            matching_index_row = index[extract_name_matched_rows].iloc[0]
        # multiple names matched
        elif extract_name_matched_rows.any():
            matching_rows = index[extract_name_matched_rows]
            matching_full_names = sorted(matching_rows["file_name"])
            full_names = ", ".join(f'"{full_name}"' for full_name in matching_full_names)

            raise OsmExtractMultipleMatchesError(
                f'Multiple extracts matched by query "{query.strip()}".\n'
                f"Matching extracts full names: {full_names}.",
                matching_full_names=matching_full_names,
            )
        # zero names matched
        elif not extract_name_matched_rows.any():
            matching_full_names = []
            suggested_query_names = difflib.get_close_matches(
                query.lower(), index["name"].str.lower().unique(), n=5, cutoff=0.7
            )

            if suggested_query_names:
                for suggested_query_name in suggested_query_names:
                    found_extracts = index[index["name"].str.lower() == suggested_query_name]
                    matching_full_names.extend(found_extracts["file_name"])
                full_names = ", ".join(matching_full_names)
                full_names = ", ".join(f'"{full_name}"' for full_name in matching_full_names)
                exception_message = (
                    f'Zero extracts matched by query "{query}".\n'
                    f"Found full names close to query: {full_names}."
                )
            else:
                exception_message = (
                    f'Zero extracts matched by query "{query}".\n'
                    "Zero close matches have been found."
                )

            raise OsmExtractZeroMatchesError(
                exception_message,
                matching_full_names=matching_full_names,
            )

        return OpenStreetMapExtract(
            id=matching_index_row["id"],
            name=matching_index_row["name"],
            parent=matching_index_row["parent"],
            url=matching_index_row["url"],
            geometry=matching_index_row["geometry"],
            file_name=matching_index_row["file_name"],
        )

    except ValueError as ex:
        raise ValueError(f"Unknown OSM extracts source: {source}.") from ex

download_extract_by_query(
    query,
    source="any",
    download_directory="files",
    progressbar=True,
)

Download an OSM extract by name.

PARAMETER DESCRIPTION
query

Query to search for a particular extract.

TYPE: str

source

OSM source name. Can be one of: 'any', 'Geofabrik', 'BBBike', 'OSM_fr'. Defaults to 'any'.

TYPE: Union[OsmExtractSource, str] DEFAULT: 'any'

download_directory

Directory where the file should be downloaded. Defaults to "files".

TYPE: Union[str, Path] DEFAULT: 'files'

progressbar

Show progress bar. Defaults to True.

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
Path

Path to the downloaded OSM extract.

TYPE: Path

Source code in quackosm/osm_extracts/__init__.py
def download_extract_by_query(
    query: str,
    source: Union[OsmExtractSource, str] = "any",
    download_directory: Union[str, Path] = "files",
    progressbar: bool = True,
) -> Path:
    """
    Download an OSM extract by name.

    Args:
        query (str): Query to search for a particular extract.
        source (Union[OsmExtractSource, str]): OSM source name. Can be one of: 'any', 'Geofabrik',
            'BBBike', 'OSM_fr'. Defaults to 'any'.
        download_directory (Union[str, Path], optional): Directory where the file should be
            downloaded. Defaults to "files".
        progressbar (bool, optional): Show progress bar. Defaults to True.

    Returns:
        Path: Path to the downloaded OSM extract.
    """
    matching_extract = get_extract_by_query(query, source)
    return download_extracts_pbf_files([matching_extract], Path(download_directory), progressbar)[0]

display_available_extracts(
    source, use_full_names=True, use_pager=False
)

Display all available OSM extracts in the form of a tree.

Output will be printed to the console.

PARAMETER DESCRIPTION
source

Source for which extracts should be displayed.

TYPE: Union[OsmExtractSource, str]

use_full_names

Whether to display full name, or short name of the extract. Full name contains all parents of the extract. Defaults to True.

TYPE: bool DEFAULT: True

use_pager

Whether to display long output using Rich pager or just print to output. Defaults to False.

TYPE: bool DEFAULT: False

RAISES DESCRIPTION
ValueError

If provided source value cannot be parsed to OsmExtractSource.

Source code in quackosm/osm_extracts/__init__.py
def display_available_extracts(
    source: Union[OsmExtractSource, str],
    use_full_names: bool = True,
    use_pager: bool = False,
) -> None:
    """
    Display all available OSM extracts in the form of a tree.

    Output will be printed to the console.

    Args:
        source (Union[OsmExtractSource, str]): Source for which extracts should be displayed.
        use_full_names (bool, optional): Whether to display full name, or short name of the extract.
            Full name contains all parents of the extract. Defaults to `True`.
        use_pager (bool, optional): Whether to display long output using Rich pager
            or just print to output. Defaults to `False`.

    Raises:
        ValueError: If provided source value cannot be parsed to OsmExtractSource.
    """
    try:
        source_enum = OsmExtractSource(source)
        tree = get_available_extracts_as_rich_tree(
            source_enum, OSM_EXTRACT_SOURCE_INDEX_FUNCTION, use_full_names
        )
        if not use_pager:
            rprint(tree)
        else:
            console = get_console()
            with console.pager():
                console.print(tree)
    except ValueError as ex:
        raise ValueError(f"Unknown OSM extracts source: {source}.") from ex

find_smallest_containing_extracts_total(
    geometry,
    geometry_coverage_iou_threshold=0.01,
    allow_uncovered_geometry=False,
)

Find smallest extracts from all OSM extract indexes that contains given polygon.

Iterates all indexes and finds smallest extracts that covers a given geometry.

Extracts are selected based on the highest value of the Intersection over Union metric with geometry. Some extracts might be discarded because of low IoU metric value leaving some parts of the geometry uncovered.

PARAMETER DESCRIPTION
geometry

Geometry to be covered.

TYPE: Union[BaseGeometry, BaseMultipartGeometry]

geometry_coverage_iou_threshold

Minimal value of the Intersection over Union metric for selecting the matching OSM extracts. Is best matching extract has value lower than the threshold, it is discarded (except the first one). Has to be in range between 0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow extracts that match the geometry exactly. Defaults to 0.01.

TYPE: float DEFAULT: 0.01

allow_uncovered_geometry

Suppress an error if some geometry parts aren't covered by any OSM extract. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[OpenStreetMapExtract]

List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.

Source code in quackosm/osm_extracts/__init__.py
def find_smallest_containing_extracts_total(
    geometry: Union[BaseGeometry, BaseMultipartGeometry],
    geometry_coverage_iou_threshold: float = 0.01,
    allow_uncovered_geometry: bool = False,
) -> list[OpenStreetMapExtract]:
    """
    Find smallest extracts from all OSM extract indexes that contains given polygon.

    Iterates all indexes and finds smallest extracts that covers a given geometry.

    Extracts are selected based on the highest value of the Intersection over Union metric with
    geometry. Some extracts might be discarded because of low IoU metric value leaving some parts
    of the geometry uncovered.

    Args:
        geometry (Union[BaseGeometry, BaseMultipartGeometry]): Geometry to be covered.
        geometry_coverage_iou_threshold (float): Minimal value of the Intersection over Union metric
            for selecting the matching OSM extracts. Is best matching extract has value lower than
            the threshold, it is discarded (except the first one). Has to be in range between
            0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow
            extracts that match the geometry exactly. Defaults to 0.01.
        allow_uncovered_geometry (bool): Suppress an error if some geometry parts aren't covered
            by any OSM extract. Defaults to `False`.

    Returns:
        List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.
    """
    return _find_smallest_containing_extracts(
        geometry=geometry,
        polygons_index_gdf=_get_combined_index(),
        geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
        allow_uncovered_geometry=allow_uncovered_geometry,
    )

find_smallest_containing_geofabrik_extracts(
    geometry,
    geometry_coverage_iou_threshold=0.01,
    allow_uncovered_geometry=False,
)

Find smallest extracts from Geofabrik that contains given geometry.

Iterates a geofabrik index and finds smallest extracts that covers a given geometry.

Extracts are selected based on the highest value of the Intersection over Union metric with geometry. Some extracts might be discarded because of low IoU metric value leaving some parts of the geometry uncovered.

PARAMETER DESCRIPTION
geometry

Geometry to be covered.

TYPE: Union[BaseGeometry, BaseMultipartGeometry]

geometry_coverage_iou_threshold

Minimal value of the Intersection over Union metric for selecting the matching OSM extracts. Is best matching extract has value lower than the threshold, it is discarded (except the first one). Has to be in range between 0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow extracts that match the geometry exactly. Defaults to 0.01.

TYPE: float DEFAULT: 0.01

allow_uncovered_geometry

Suppress an error if some geometry parts aren't covered by any OSM extract. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[OpenStreetMapExtract]

List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.

Source code in quackosm/osm_extracts/__init__.py
def find_smallest_containing_geofabrik_extracts(
    geometry: Union[BaseGeometry, BaseMultipartGeometry],
    geometry_coverage_iou_threshold: float = 0.01,
    allow_uncovered_geometry: bool = False,
) -> list[OpenStreetMapExtract]:
    """
    Find smallest extracts from Geofabrik that contains given geometry.

    Iterates a geofabrik index and finds smallest extracts that covers a given geometry.

    Extracts are selected based on the highest value of the Intersection over Union metric with
    geometry. Some extracts might be discarded because of low IoU metric value leaving some parts
    of the geometry uncovered.

    Args:
        geometry (Union[BaseGeometry, BaseMultipartGeometry]): Geometry to be covered.
        geometry_coverage_iou_threshold (float): Minimal value of the Intersection over Union metric
            for selecting the matching OSM extracts. Is best matching extract has value lower than
            the threshold, it is discarded (except the first one). Has to be in range between
            0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow
            extracts that match the geometry exactly. Defaults to 0.01.
        allow_uncovered_geometry (bool): Suppress an error if some geometry parts aren't covered
            by any OSM extract. Defaults to `False`.

    Returns:
        List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.
    """
    return _find_smallest_containing_extracts(
        geometry=geometry,
        polygons_index_gdf=OSM_EXTRACT_SOURCE_INDEX_FUNCTION[OsmExtractSource.geofabrik](),
        geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
        allow_uncovered_geometry=allow_uncovered_geometry,
    )

find_smallest_containing_openstreetmap_fr_extracts(
    geometry,
    geometry_coverage_iou_threshold=0.01,
    allow_uncovered_geometry=False,
)

Find smallest extracts from OpenStreetMap.fr that contains given polygon.

Iterates an osm.fr index and finds smallest extracts that covers a given geometry.

Extracts are selected based on the highest value of the Intersection over Union metric with geometry. Some extracts might be discarded because of low IoU metric value leaving some parts of the geometry uncovered.

PARAMETER DESCRIPTION
geometry

Geometry to be covered.

TYPE: Union[BaseGeometry, BaseMultipartGeometry]

geometry_coverage_iou_threshold

Minimal value of the Intersection over Union metric for selecting the matching OSM extracts. Is best matching extract has value lower than the threshold, it is discarded (except the first one). Has to be in range between 0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow extracts that match the geometry exactly. Defaults to 0.01.

TYPE: float DEFAULT: 0.01

allow_uncovered_geometry

Suppress an error if some geometry parts aren't covered by any OSM extract. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[OpenStreetMapExtract]

List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.

Source code in quackosm/osm_extracts/__init__.py
def find_smallest_containing_openstreetmap_fr_extracts(
    geometry: Union[BaseGeometry, BaseMultipartGeometry],
    geometry_coverage_iou_threshold: float = 0.01,
    allow_uncovered_geometry: bool = False,
) -> list[OpenStreetMapExtract]:
    """
    Find smallest extracts from OpenStreetMap.fr that contains given polygon.

    Iterates an osm.fr index and finds smallest extracts that covers a given geometry.

    Extracts are selected based on the highest value of the Intersection over Union metric with
    geometry. Some extracts might be discarded because of low IoU metric value leaving some parts
    of the geometry uncovered.

    Args:
        geometry (Union[BaseGeometry, BaseMultipartGeometry]): Geometry to be covered.
        geometry_coverage_iou_threshold (float): Minimal value of the Intersection over Union metric
            for selecting the matching OSM extracts. Is best matching extract has value lower than
            the threshold, it is discarded (except the first one). Has to be in range between
            0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow
            extracts that match the geometry exactly. Defaults to 0.01.
        allow_uncovered_geometry (bool): Suppress an error if some geometry parts aren't covered
            by any OSM extract. Defaults to `False`.

    Returns:
        List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.
    """
    return _find_smallest_containing_extracts(
        geometry=geometry,
        polygons_index_gdf=OSM_EXTRACT_SOURCE_INDEX_FUNCTION[OsmExtractSource.osm_fr](),
        geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
        allow_uncovered_geometry=allow_uncovered_geometry,
    )

find_smallest_containing_bbbike_extracts(
    geometry,
    geometry_coverage_iou_threshold=0.01,
    allow_uncovered_geometry=False,
)

Find smallest extracts from BBBike that contains given polygon.

Iterates an BBBike index and finds smallest extracts that covers a given geometry.

Extracts are selected based on the highest value of the Intersection over Union metric with geometry. Some extracts might be discarded because of low IoU metric value leaving some parts of the geometry uncovered.

PARAMETER DESCRIPTION
geometry

Geometry to be covered.

TYPE: Union[BaseGeometry, BaseMultipartGeometry]

geometry_coverage_iou_threshold

Minimal value of the Intersection over Union metric for selecting the matching OSM extracts. Is best matching extract has value lower than the threshold, it is discarded (except the first one). Has to be in range between 0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow extracts that match the geometry exactly. Defaults to 0.01.

TYPE: float DEFAULT: 0.01

allow_uncovered_geometry

Suppress an error if some geometry parts aren't covered by any OSM extract. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[OpenStreetMapExtract]

List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.

Source code in quackosm/osm_extracts/__init__.py
def find_smallest_containing_bbbike_extracts(
    geometry: Union[BaseGeometry, BaseMultipartGeometry],
    geometry_coverage_iou_threshold: float = 0.01,
    allow_uncovered_geometry: bool = False,
) -> list[OpenStreetMapExtract]:
    """
    Find smallest extracts from BBBike that contains given polygon.

    Iterates an BBBike index and finds smallest extracts that covers a given geometry.

    Extracts are selected based on the highest value of the Intersection over Union metric with
    geometry. Some extracts might be discarded because of low IoU metric value leaving some parts
    of the geometry uncovered.

    Args:
        geometry (Union[BaseGeometry, BaseMultipartGeometry]): Geometry to be covered.
        geometry_coverage_iou_threshold (float): Minimal value of the Intersection over Union metric
            for selecting the matching OSM extracts. Is best matching extract has value lower than
            the threshold, it is discarded (except the first one). Has to be in range between
            0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow
            extracts that match the geometry exactly. Defaults to 0.01.
        allow_uncovered_geometry (bool): Suppress an error if some geometry parts aren't covered
            by any OSM extract. Defaults to `False`.

    Returns:
        List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.
    """
    return _find_smallest_containing_extracts(
        geometry=geometry,
        polygons_index_gdf=OSM_EXTRACT_SOURCE_INDEX_FUNCTION[OsmExtractSource.bbbike](),
        geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
        allow_uncovered_geometry=allow_uncovered_geometry,
    )

find_smallest_containing_extracts(
    geometry,
    source,
    geometry_coverage_iou_threshold=0.01,
    allow_uncovered_geometry=False,
)

Find smallest extracts from a given OSM source that contains given polygon.

Iterates an OSM source index and finds smallest extracts that covers a given geometry.

Extracts are selected based on the highest value of the Intersection over Union metric with geometry. Some extracts might be discarded because of low IoU metric value leaving some parts of the geometry uncovered.

PARAMETER DESCRIPTION
geometry

Geometry to be covered.

TYPE: Union[BaseGeometry, BaseMultipartGeometry]

source

OSM source name. Can be one of: 'any', 'Geofabrik', 'BBBike', 'OSMfr'.

TYPE: Union[OsmExtractSource, str]

geometry_coverage_iou_threshold

Minimal value of the Intersection over Union metric for selecting the matching OSM extracts. Is best matching extract has value lower than the threshold, it is discarded (except the first one). Has to be in range between 0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow extracts that match the geometry exactly. Defaults to 0.01.

TYPE: float DEFAULT: 0.01

allow_uncovered_geometry

Suppress an error if some geometry parts aren't covered by any OSM extract. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
list[OpenStreetMapExtract]

List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.

Source code in quackosm/osm_extracts/__init__.py
def find_smallest_containing_extracts(
    geometry: Union[BaseGeometry, BaseMultipartGeometry],
    source: Union[OsmExtractSource, str],
    geometry_coverage_iou_threshold: float = 0.01,
    allow_uncovered_geometry: bool = False,
) -> list[OpenStreetMapExtract]:
    """
    Find smallest extracts from a given OSM source that contains given polygon.

    Iterates an OSM source index and finds smallest extracts that covers a given geometry.

    Extracts are selected based on the highest value of the Intersection over Union metric with
    geometry. Some extracts might be discarded because of low IoU metric value leaving some parts
    of the geometry uncovered.

    Args:
        geometry (Union[BaseGeometry, BaseMultipartGeometry]): Geometry to be covered.
        source (Union[OsmExtractSource, str]): OSM source name. Can be one of: 'any', 'Geofabrik',
            'BBBike', 'OSMfr'.
        geometry_coverage_iou_threshold (float): Minimal value of the Intersection over Union metric
            for selecting the matching OSM extracts. Is best matching extract has value lower than
            the threshold, it is discarded (except the first one). Has to be in range between
            0 and 1. Value of 0 will allow every intersected extract, value of 1 will only allow
            extracts that match the geometry exactly. Defaults to 0.01.
        allow_uncovered_geometry (bool): Suppress an error if some geometry parts aren't covered
            by any OSM extract. Defaults to `False`.

    Returns:
        List[OpenStreetMapExtract]: List of extracts name, URL to download it and boundary polygon.
    """
    try:
        source_enum = OsmExtractSource(source)
        index = OSM_EXTRACT_SOURCE_INDEX_FUNCTION.get(source_enum, _get_combined_index)()
        return _find_smallest_containing_extracts(
            geometry=geometry,
            polygons_index_gdf=index,
            geometry_coverage_iou_threshold=geometry_coverage_iou_threshold,
            allow_uncovered_geometry=allow_uncovered_geometry,
        )
    except ValueError as ex:
        raise ValueError(f"Unknown OSM extracts source: {source}.") from ex