Overture Maps Loader¶
OvertureMapsLoader
can download the Overture Maps data from the s3 bucket for a given region.
It is a wrapper around the OvertureMaestro
library that can download the data in the original format but also have some advanced functions.
In the SRAI
context, OvertureMapsLoader
utilizes so-called wide format for returning features with columns representing potential categories of the object. If you want to read more in-depth about this format, you can checkout this OvertureMaestro's docs page.
from shapely.geometry import box
from srai.constants import GEOMETRY_COLUMN
from srai.loaders import OvertureMapsLoader
from srai.regionalizers import geocode_to_region_gdf
Using OvertureMapsLoader to download data for a specific area¶
Download all available features in Paris, France¶
loader = OvertureMapsLoader()
paris = geocode_to_region_gdf("Paris")
paris_features_gdf = loader.load(paris)
paris_features_gdf
Finished operation in 0:00:26
geometry | base|infrastructure|aerialway | base|infrastructure|airport | base|infrastructure|barrier | base|infrastructure|bridge | base|infrastructure|communication | base|infrastructure|emergency | base|infrastructure|manhole | base|infrastructure|pedestrian | base|infrastructure|pier | ... | places|place|professional_services | places|place|public_service_and_government | places|place|real_estate | places|place|religious_organization | places|place|retail | places|place|structure_and_geography | places|place|travel | transportation|segment|rail | transportation|segment|road | transportation|segment|water | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
feature_id | |||||||||||||||||||||
08b1fb46666c3fff0001a862ac62e27e | POINT (2.34731 48.86724) | False | False | True | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb46666cefff0001ad93abc79405 | POINT (2.34739 48.86729) | False | False | True | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb46666c1fff0001a39bc9a68810 | POINT (2.3473 48.8673) | False | False | True | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb46666c1fff0001aa11990adc4d | POINT (2.34741 48.86733) | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb46666c1fff0001ad131e8b11cc | POINT (2.34717 48.86734) | False | False | True | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
08b1fb4759a49fff020097d3e602ce58 | POLYGON ((2.33595 48.90143, 2.33573 48.90155, ... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb4759a49fff02007817c242243d | POLYGON ((2.3362 48.9014, 2.33597 48.90153, 2.... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb4664d11fff0200a4fea2da5f4b | POLYGON ((2.3375 48.90124, 2.33756 48.9012, 2.... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb4664d1cfff020073b742aa7b3b | POLYGON ((2.33818 48.90131, 2.3381 48.90135, 2... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b1fb4664d03fff02009c2aabdb5591 | POLYGON ((2.33841 48.90134, 2.3384 48.90134, 2... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
570790 rows × 115 columns
Plot features¶
Colours from the this palette: https://colorhunt.co/palette/f8ededff8225b43f3f173b45
ax = paris.plot(color="#F8EDED", figsize=(16, 16))
# plot water
water_columns = [c for c in paris_features_gdf.columns if "water" in c]
water_data = paris_features_gdf[paris_features_gdf[water_columns].any(axis=1)]
water_data.plot(ax=ax, color="#FF8225", markersize=0)
# plot_roads
roads_data = paris_features_gdf[paris_features_gdf["transportation|segment|road"]]
roads_data.plot(ax=ax, color="#B43F3F", markersize=0, linewidth=0.25)
# plot buildings
building_columns = [c for c in paris_features_gdf.columns if c.startswith("buildings")]
buildings_data = paris_features_gdf[paris_features_gdf[building_columns].any(axis=1)]
buildings_data.plot(ax=ax, color="#173B45", markersize=0)
paris.boundary.plot(ax=ax, color="#173B45", linewidth=2, alpha=0.5)
xmin, ymin, xmax, ymax = paris.total_bounds
ax.set_xlim(xmin - 0.001, xmax + 0.001)
ax.set_ylim(ymin - 0.001, ymax + 0.001)
ax.set_axis_off()
Download more detailed data with higher hierarchy value¶
By default, the hierarchy_depth
value is equal to 1
, but it can be set to None
to get a full list of all possible columns.
manhattan_bbox = box(-73.994551, 40.762396, -73.936872, 40.804239)
loader = OvertureMapsLoader(hierarchy_depth=None)
new_york_features_gdf = loader.load(manhattan_bbox)
new_york_features_gdf
Finished operation in 0:00:31
geometry | base|infrastructure|aerialway|aerialway_station | base|infrastructure|aerialway|cable_car | base|infrastructure|aerialway|chair_lift | base|infrastructure|aerialway|drag_lift | base|infrastructure|aerialway|gondola | base|infrastructure|aerialway|goods | base|infrastructure|aerialway|j-bar | base|infrastructure|aerialway|magic_carpet | base|infrastructure|aerialway|mixed_lift | ... | transportation|segment|road|service|parking_aisle | transportation|segment|road|steps | transportation|segment|road|tertiary | transportation|segment|road|tertiary|link | transportation|segment|road|track | transportation|segment|road|trunk | transportation|segment|road|trunk|link | transportation|segment|road|unclassified | transportation|segment|road|unknown | transportation|segment|water | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
feature_id | |||||||||||||||||||||
08b2a100d2ce9fff0001bbf87e990781 | LINESTRING (-73.97214 40.7268, -73.97341 40.72... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a10725843fff0001aed186cf90e1 | POINT (-73.99301 40.7627) | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a10725850fff0001a8078eb60797 | POINT (-73.99422 40.76333) | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a10725850fff0001abb9a995714a | POINT (-73.99409 40.76328) | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a10725842fff0001abd29d53f39b | POINT (-73.9933 40.76271) | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
08b2a100f2650fff02008c2684f54385 | POLYGON ((-73.93664 40.77279, -73.93679 40.772... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a100f2656fff0200f667ed909313 | POLYGON ((-73.93718 40.77323, -73.93719 40.773... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a100f260bfff0200ec356893da2f | POLYGON ((-73.93737 40.77379, -73.93741 40.773... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a100f2604fff0200b0ba2bc875ff | POLYGON ((-73.93743 40.7752, -73.93661 40.7749... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
08b2a100f2622fff0200df47d5bd8207 | POLYGON ((-73.93699 40.77577, -73.93694 40.775... | False | False | False | False | False | False | False | False | False | ... | False | False | False | False | False | False | False | False | False | False |
64539 rows × 2633 columns
As you can see, there are over 2600
columns available.
Let's see top 20 most popular columns.
new_york_features_gdf.drop(columns=GEOMETRY_COLUMN).sum().sort_values(ascending=False).head(20)
buildings|building 12487 base|land|tree|tree 5670 base|infrastructure|transportation|crossing 3848 transportation|segment|road|footway 3379 base|infrastructure|barrier|kerb 3025 places|place|health_and_medical 2947 places|place|health_and_medical|doctor 2278 transportation|segment|road|footway|sidewalk 2145 places|place|eat_and_drink|restaurant 1742 buildings|building|residential|apartments 1706 transportation|segment|road|footway|crosswalk 1645 places|place|professional_services 1355 base|infrastructure|transit|bicycle_parking 994 base|infrastructure|transportation|traffic_signals 990 places|place|retail 867 places|place|beauty_and_spa 853 transportation|segment|road|residential 832 places|place|health_and_medical|hospital 830 places|place|beauty_and_spa|beauty_salon 791 places|place|attractions_and_activities|landmark_and_historical_building 749 dtype: int64
Configure places dataset¶
Places schema is the only one that is treated differently than other data types.
By default, places use both primary
and alternate
categories to define a feature.
Additionally, there is a filter applied to get only features with confidence score >= 0.75
.
There are two dedicated parameters: places_minimal_confidence
and places_use_primary_category_only
to configure how the data should be transformed.
Let's do example with both of these parameters. We will also use a theme_type_pairs
parameter to limit the scope of the downloaded data.
default_confidence_loader = OvertureMapsLoader(
theme_type_pairs=[("places", "place")], places_use_primary_category_only=True
)
strict_confidence_loader = OvertureMapsLoader(
theme_type_pairs=[("places", "place")],
places_minimal_confidence=0.99,
places_use_primary_category_only=True,
)
songpa = geocode_to_region_gdf("Songpa-gu, Seoul")
songpa_default_confidence_features_gdf = default_confidence_loader.load(songpa)
songpa_strict_confidence_features_gdf = strict_confidence_loader.load(songpa)
print(f"Default confidence score: {len(songpa_default_confidence_features_gdf)}")
print(f"Strict confidence score: {len(songpa_strict_confidence_features_gdf)}")
Finished operation in 0:00:03
Finished operation in 0:00:03
Default confidence score: 5224 Strict confidence score: 3
Let's see the count of categories in the places dataset with confidence score >= 0.99
.
songpa_strict_confidence_features_df = songpa_strict_confidence_features_gdf.drop(
columns=GEOMETRY_COLUMN
)
songpa_strict_confidence_features_df.sum().loc[lambda x: x > 0].sort_values(ascending=False)
places|place|retail 3 dtype: int64
Plot features¶
Now we will see the difference between default list of places (gray dots) and strict ones (coloured circles)
m = songpa_default_confidence_features_gdf.loc[
songpa_default_confidence_features_gdf.index.difference(
songpa_strict_confidence_features_gdf.index
)
].geometry.explore(
tiles="CartoDB Voyager",
color="gray",
tooltip=False,
style_kwds=dict(opacity=0.25, stroke=False),
)
songpa.boundary.explore(m=m, color="black")
songpa_gdf_with_categories = songpa_strict_confidence_features_gdf.join(
songpa_strict_confidence_features_df.dot(songpa_strict_confidence_features_df.columns).rename(
"category"
)
)
songpa_gdf_with_categories.geometry.explore(
m=m,
tooltip=False,
marker_kwds=dict(radius=6),
style_kwds=dict(color="black", fillOpacity=1),
)
songpa_gdf_with_categories[[GEOMETRY_COLUMN, "category"]].explore(
m=m,
column="category",
tooltip=["feature_id", "category"],
cmap="tab20",
marker_kwds=dict(radius=4),
style_kwds=dict(fillOpacity=1),
)