PBF File Reader¶
PBFFileReader
can really quickly parse full OSM extract in the form of *.osm.pbf
file.
It uses DuckDB
with spatial
extension to convert pbf
files into geoparquet
files without GDAL dependency.
Reader can filter objects by geometry and by OSM tags with option to split tags into columns or keep it as a single dictionary.
Caching strategy is implemented to reduce computations, but it can be overriden using ignore_cache
parameter.
Download all buildings in Reykjavík, Iceland¶
Filtering the data with geometry and by tags, with tags in exploded form
from quackosm import PbfFileReader
import urllib.request
import osmnx as ox
iceland_pbf_url = "https://download.geofabrik.de/europe/iceland-latest.osm.pbf"
iceland_pbf_file = "iceland.osm.pbf"
urllib.request.urlretrieve(iceland_pbf_url, iceland_pbf_file)
('iceland.osm.pbf', <http.client.HTTPMessage at 0x7f0b4a256c50>)
reykjavik_gdf = ox.geocode_to_gdf("Reykjavík, IS")
reykjavik_gdf
geometry | bbox_north | bbox_south | bbox_east | bbox_west | place_id | osm_type | osm_id | lat | lon | class | type | place_rank | importance | addresstype | name | display_name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MULTIPOLYGON (((-21.98383 64.15018, -21.98283 ... | 64.315371 | 64.040402 | -21.401999 | -21.98383 | 267702007 | relation | 2580605 | 64.145981 | -21.942237 | boundary | administrative | 11 | 0.618605 | city | Reykjavik | Reykjavik, Capital Region, Iceland |
reader = PbfFileReader(
geometry_filter=reykjavik_gdf.geometry.iloc[0], tags_filter={"building": True}
)
reykjavik_buildings_gpq = reader.convert_pbf_to_gpq("iceland.osm.pbf")
reykjavik_buildings_gpq
PosixPath('files/iceland_ae99e3d94b104c32f569e07bbca0718664a35b810b85c9d092c1e9221f26aa22_1d67f86a6950d4e3ac2ccd475ab10c3d276aeca67edae67c55719a0d7c96ca87_exploded.geoparquet')
Read those features using DuckDB¶
import duckdb
connection = duckdb.connect()
connection.load_extension("parquet")
connection.load_extension("spatial")
features_relation = connection.read_parquet(str(reykjavik_buildings_gpq)).project(
"* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
)
features_relation
┌─────────────────┬──────────────┬─────────────────────────────────────────────────────────────────────────────────────┐ │ feature_id │ building │ geometry │ │ varchar │ varchar │ geometry │ ├─────────────────┼──────────────┼─────────────────────────────────────────────────────────────────────────────────────┤ │ node/2710083164 │ yes │ POINT (-21.9140228 64.1452586) │ │ node/2975118984 │ yes │ POINT (-21.7832872 64.1364481) │ │ node/4715038060 │ construction │ POINT (-21.8992057 64.1397328) │ │ way/233272373 │ yes │ POLYGON ((-21.9065787 64.1265806, -21.9064947 64.1266802, -21.9063334 64.1266544,… │ │ way/233272374 │ yes │ POLYGON ((-21.9023826 64.1240191, -21.9023877 64.1239458, -21.9022557 64.123944, … │ │ way/233272375 │ yes │ POLYGON ((-21.9024202 64.1258865, -21.9022966 64.1258335, -21.9024978 64.1257399,… │ │ way/233272376 │ yes │ POLYGON ((-21.9011981 64.1236841, -21.9012224 64.1236745, -21.9012813 64.1237029,… │ │ way/233272377 │ yes │ POLYGON ((-21.9078715 64.1264468, -21.9077599 64.1263756, -21.9079738 64.1263128,… │ │ way/233272378 │ yes │ POLYGON ((-21.9027774 64.1238052, -21.9026257 64.1238656, -21.9027846 64.1239417,… │ │ way/233272379 │ yes │ POLYGON ((-21.9027369 64.1258564, -21.9026223 64.1258006, -21.9024202 64.1258865,… │ │ · │ · │ · │ │ · │ · │ · │ │ · │ · │ · │ │ way/127401873 │ yes │ POLYGON ((-21.9097157 64.1461758, -21.9098321 64.1462748, -21.9094854 64.1463524,… │ │ way/127401875 │ office │ POLYGON ((-21.9114765 64.145423, -21.9114859 64.1454551, -21.9118218 64.1454366, … │ │ way/127401898 │ warehouse │ POLYGON ((-21.9099763 64.1453441, -21.9103756 64.1453197, -21.910412 64.1454328, … │ │ way/127401899 │ yes │ POLYGON ((-21.9087455 64.1463662, -21.9084851 64.1463826, -21.9083621 64.1460115,… │ │ way/127401904 │ warehouse │ POLYGON ((-21.9107832 64.1457589, -21.9104962 64.1458712, -21.9102038 64.1457214,… │ │ way/127401916 │ yes │ POLYGON ((-21.9094892 64.1461884, -21.9095914 64.146162, -21.9096085 64.1461747, … │ │ way/127401919 │ yes │ POLYGON ((-21.9096293 64.1461229, -21.9096967 64.1461055, -21.9097183 64.1461214,… │ │ way/127401926 │ office │ POLYGON ((-21.9079247 64.1457773, -21.9078474 64.145543, -21.9078798 64.1455403, … │ │ way/127604466 │ office │ POLYGON ((-21.9123303 64.1450162, -21.912271 64.1448297, -21.9120645 64.1448422, … │ │ way/127604470 │ yes │ POLYGON ((-21.91076 64.1445659, -21.9110412 64.1445607, -21.9110489 64.1446407, -… │ ├─────────────────┴──────────────┴─────────────────────────────────────────────────────────────────────────────────────┤ │ ? rows (>9999 rows, 20 shown) 3 columns │ └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
Count all buildings¶
features_relation.count("feature_id")
┌───────────────────┐ │ count(feature_id) │ │ int64 │ ├───────────────────┤ │ 25268 │ └───────────────────┘
Download main roads for Estonia¶
Filtering the data only by tags, with tags in exploded form
highways_filter = {
"highway": [
"motorway",
"trunk",
"primary",
"secondary",
"tertiary",
]
}
estonia_pbf_url = "http://download.geofabrik.de/europe/estonia-latest.osm.pbf"
estonia_pbf_file = "estonia.osm.pbf"
urllib.request.urlretrieve(estonia_pbf_url, estonia_pbf_file)
reader = PbfFileReader(geometry_filter=None, tags_filter=highways_filter)
estonia_features_gpq = reader.convert_pbf_to_gpq(estonia_pbf_file)
estonia_features_gpq
PosixPath('files/estonia_87e7b741e959b474170855fa123c243d4981d68b21ad4643272ecbeeafea35e2_noclip_exploded.geoparquet')
features_relation = connection.read_parquet(str(estonia_features_gpq)).project(
"* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
)
features_relation
┌───────────────┬───────────┬──────────────────────────────────────────────────────────────────────────────────────────┐ │ feature_id │ highway │ geometry │ │ varchar │ varchar │ geometry │ ├───────────────┼───────────┼──────────────────────────────────────────────────────────────────────────────────────────┤ │ way/45497890 │ tertiary │ LINESTRING (25.8024973 59.5414089, 25.8030749 59.5413915, 25.803579 59.5413527, 25.804… │ │ way/45497891 │ tertiary │ LINESTRING (25.7963532 59.5406013, 25.7968087 59.5405578) │ │ way/45497892 │ tertiary │ LINESTRING (25.7968087 59.5405578, 25.7973018 59.5405203, 25.7974484 59.5405173, 25.79… │ │ way/45497896 │ tertiary │ LINESTRING (25.7403603 59.5407345, 25.7398824 59.5407602) │ │ way/45497897 │ tertiary │ LINESTRING (25.7398824 59.5407602, 25.7395757 59.5407784, 25.7393804 59.54079, 25.7389… │ │ way/53017024 │ primary │ LINESTRING (25.5978112 58.9120426, 25.599252 58.9124536, 25.6010248 58.9130309, 25.601… │ │ way/53017025 │ tertiary │ LINESTRING (25.6439113 58.9013023, 25.6436925 58.9012195, 25.6432378 58.9010476, 25.64… │ │ way/53017026 │ trunk │ LINESTRING (25.641522 58.9001989, 25.6410228 58.9003803) │ │ way/53017027 │ trunk │ LINESTRING (25.6407748 58.9003111, 25.6413298 58.9001129) │ │ way/53017032 │ trunk │ LINESTRING (25.6108179 58.931609, 25.6106506 58.932234, 25.6105079 58.9327703, 25.6103… │ │ · │ · │ · │ │ · │ · │ · │ │ · │ · │ · │ │ way/481262015 │ trunk │ LINESTRING (24.2395472 59.3364629, 24.2387478 59.3365035, 24.2369661 59.3366019, 24.23… │ │ way/482445311 │ secondary │ LINESTRING (26.4005034 58.7550337, 26.4006201 58.7549803, 26.4007597 58.7549446, 26.40… │ │ way/484372693 │ tertiary │ LINESTRING (24.0361528 59.387179, 24.0360677 59.3870642, 24.0360197 59.3869995, 24.035… │ │ way/484372694 │ tertiary │ LINESTRING (24.0364655 59.3876149, 24.0362273 59.3872829, 24.0361528 59.387179) │ │ way/484372695 │ tertiary │ LINESTRING (24.0384195 59.3891733, 24.0383618 59.3891467, 24.0380824 59.3890027, 24.03… │ │ way/486935441 │ tertiary │ LINESTRING (24.3254447 59.1106452, 24.3262053 59.1109706, 24.3274424 59.1114149, 24.32… │ │ way/486953551 │ tertiary │ LINESTRING (24.1449307 59.1539196, 24.1453441 59.1530174, 24.1454556 59.152884, 24.145… │ │ way/486953559 │ tertiary │ LINESTRING (24.1487808 59.2235343, 24.1481138 59.2233856, 24.1473616 59.223183, 24.145… │ │ way/487939973 │ tertiary │ LINESTRING (27.3527569 58.3935379, 27.3528816 58.3934048, 27.3530586 58.3932454, 27.35… │ │ way/487967863 │ secondary │ LINESTRING (24.6876052 59.4108936, 24.6877084 59.4110042, 24.6878575 59.4111655, 24.68… │ ├───────────────┴───────────┴──────────────────────────────────────────────────────────────────────────────────────────┤ │ ? rows (>9999 rows, 20 shown) 3 columns │ └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
Count loaded roads¶
features_relation.count("feature_id")
┌───────────────────┐ │ count(feature_id) │ │ int64 │ ├───────────────────┤ │ 24149 │ └───────────────────┘
length_in_meters = (
features_relation.project(
"ST_Length(ST_Transform(geometry, 'EPSG:4326', 'EPSG:3301')) AS road_length"
)
.sum("road_length")
.fetchone()[0]
)
length_in_km = length_in_meters / 1000
length_in_km
29518.648994282084
Plot the roads using GeoPandas¶
With fast loading of geoparquet files using geoarrow.pyarrow
library.
import geoarrow.pyarrow as ga
import quackosm._geo_arrow_io as io
from quackosm._constants import GEOMETRY_COLUMN
parquet_table = io.read_geoparquet_table(estonia_features_gpq)
ga.to_geopandas(parquet_table.column(GEOMETRY_COLUMN)).plot()
<Axes: >
Download all data for Liechtenstein¶
Without filtering, with tags in a compact form
liechtenstein_pbf_url = "https://download.geofabrik.de/europe/liechtenstein-latest.osm.pbf"
liechtenstein_pbf_file = "liechtenstein.osm.pbf"
urllib.request.urlretrieve(liechtenstein_pbf_url, liechtenstein_pbf_file)
# Here explode_tags is set to False explicitly,
# but it would set automatically when not filtering the data
reader = PbfFileReader(geometry_filter=None, tags_filter=None)
liechtenstein_features_gpq = reader.convert_pbf_to_gpq(
liechtenstein_pbf_file, explode_tags=False
)
liechtenstein_features_gpq
PosixPath('files/liechtenstein_nofilter_noclip_compact.geoparquet')
features_relation = connection.read_parquet(str(liechtenstein_features_gpq)).project(
"* REPLACE (ST_GeomFromWKB(geometry) AS geometry)"
)
features_relation
┌──────────────────┬────────────────────────────────────────────────────────────────────┬──────────────────────────────┐ │ feature_id │ tags │ geometry │ │ varchar │ map(varchar, varchar) │ geometry │ ├──────────────────┼────────────────────────────────────────────────────────────────────┼──────────────────────────────┤ │ node/10978448978 │ {drinking_water=yes, man_made=water_tap} │ POINT (9.5479009 47.2224384) │ │ node/10979644597 │ {amenity=vending_machine, bin=yes, fee=no, vending=excrement_bags} │ POINT (9.5470954 47.2275996) │ │ node/10979660209 │ {leisure=firepit} │ POINT (9.545326 47.226931) │ │ node/10979660210 │ {amenity=waste_basket, colour=#bababa, material=metal} │ POINT (9.5452633 47.226943) │ │ node/10979660211 │ {amenity=bench, backrest=no, material=wood} │ POINT (9.5452908 47.2269243) │ │ node/10979660212 │ {amenity=bench, backrest=no, material=wood} │ POINT (9.5453091 47.2269416) │ │ node/10979660213 │ {amenity=bench, backrest=no, material=wood} │ POINT (9.5453513 47.2269279) │ │ node/10981052696 │ {artist_name=Peter Hemmi, artwork_type=installation, name=Blühen… │ POINT (9.5206322 47.1349117) │ │ node/26863444 │ {name=Kuhgrat, natural=peak, wikidata=Q4244296, wikimedia_common… │ POINT (9.5608307 47.1666716) │ │ node/30603864 │ {crossing=uncontrolled, highway=crossing} │ POINT (9.5203855 47.1705163) │ │ · │ · │ · │ │ · │ · │ · │ │ · │ · │ · │ │ node/5140945681 │ {natural=tree} │ POINT (9.5247518 47.106252) │ │ node/5140945682 │ {natural=tree} │ POINT (9.5246942 47.1062487) │ │ node/5140945683 │ {natural=tree} │ POINT (9.5246222 47.1062433) │ │ node/5140945684 │ {natural=tree} │ POINT (9.5245694 47.1062313) │ │ node/5140945685 │ {natural=tree} │ POINT (9.524491 47.1062269) │ │ node/5140945686 │ {natural=tree} │ POINT (9.5244271 47.1062171) │ │ node/5140945687 │ {natural=tree} │ POINT (9.5243583 47.1062139) │ │ node/5140945688 │ {natural=tree} │ POINT (9.5243023 47.1062106) │ │ node/5140945689 │ {natural=tree} │ POINT (9.5271049 47.0871184) │ │ node/5140945690 │ {natural=tree} │ POINT (9.5271465 47.0872078) │ ├──────────────────┴────────────────────────────────────────────────────────────────────┴──────────────────────────────┤ │ ? rows (>9999 rows, 20 shown) 3 columns │ └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
Return data as GeoDataFrame¶
PbfFileReader
can also return the data in the GeoDataFrame form.
Here the caching strategy will be utilized - file won't be transformed again.
features_gdf = reader.get_features_gdf(liechtenstein_pbf_file)
features_gdf
tags | geometry | |
---|---|---|
feature_id | ||
node/10978448978 | {'drinking_water': 'yes', 'man_made': 'water_t... | POINT (9.54790 47.22244) |
node/10979644597 | {'amenity': 'vending_machine', 'bin': 'yes', '... | POINT (9.54710 47.22760) |
node/10979660209 | {'leisure': 'firepit'} | POINT (9.54533 47.22693) |
node/10979660210 | {'amenity': 'waste_basket', 'colour': '#bababa... | POINT (9.54526 47.22694) |
node/10979660211 | {'amenity': 'bench', 'backrest': 'no', 'materi... | POINT (9.54529 47.22692) |
... | ... | ... |
relation/6716202 | {'landuse': 'farmland', 'type': 'multipolygon'} | POLYGON ((9.55418 47.21403, 9.55641 47.21556, ... |
relation/9381951 | {'landuse': 'farmland', 'type': 'multipolygon'} | POLYGON ((9.50008 47.05989, 9.49992 47.06037, ... |
relation/3990462 | {'addr:city': 'Schaan', 'addr:country': 'LI', ... | POLYGON ((9.50840 47.16832, 9.50864 47.16852, ... |
relation/6535868 | {'natural': 'scree', 'type': 'multipolygon'} | POLYGON ((9.56933 47.05600, 9.56874 47.05609, ... |
relation/9403773 | {'landuse': 'forest', 'type': 'multipolygon'} | POLYGON ((9.52016 47.08864, 9.52082 47.08958, ... |
50983 rows × 2 columns
Plot the forests using GeoPandas¶
Filter all polygons and features with landuse
=forest
.
features_gdf[
features_gdf.geom_type.isin(("Polygon", "MultiPolygon"))
& features_gdf.tags.apply(lambda x: "landuse" in x and x["landuse"] == "forest")
].plot(color="green")
<Axes: >