Custom SQL filter¶
QuackOSM enables advanced users to filter data using SQL filters that will be used by DuckDB during processing.
The filter will be loaded alongside with OSM tags filters and features IDs filters.
SQL filter clause will can be passed both in Python API (as custom_sql_filter parameter) and the CLI (as --custom-sql-filter option).
Two columns available to users are: id (type BIGINT) and tags (type: MAP(VARCHAR, VARCHAR)).
You can look for available functions into a DuckDB documentation.
Below are few examples on how to use the custom SQL filters.
import quackosm as qosm
data = qosm.convert_geometry_to_geodataframe(
geometry_filter=qosm.geocode_to_geometry("Greater London"),
osm_extract_source="Geofabrik",
custom_sql_filter="cardinality(tags) = 10",
)
data["tags"].head(10).values
Finished operation in 0:00:17
array([{'bench': 'yes', 'highway': 'bus_stop', 'name': 'Westerham Heights', 'naptan:AtcoCode': '490014442N', 'naptan:Bearing': 'N', 'naptan:CommonName': 'Westerham Heights', 'naptan:Street': 'MAIN ROAD', 'naptan:verified': 'no', 'public_transport': 'platform', 'shelter': 'yes'},
{'amenity': 'post_box', 'check_date': '2021-01-02', 'collection_times': 'Mo-Fr 09:00, Sa 07:00', 'name': 'The Fox', 'operator': 'Royal Mail', 'operator:wikidata': 'Q638098', 'post_box:type': 'pillar', 'ref': 'CR3 228', 'royal_cypher': 'GVIR', 'royal_cypher:wikidata': 'Q33102169'},
{'bench': 'no', 'highway': 'bus_stop', 'name': 'Westerham Hill Baptist Church', 'naptan:AtcoCode': '490014443S', 'naptan:Bearing': 'S', 'naptan:CommonName': 'Westerham Hill Baptist Church', 'naptan:Indicator': 'Stop', 'naptan:verified': 'no', 'public_transport': 'platform', 'shelter': 'no'},
{'bicycle': 'yes', 'designation': 'public_bridleway', 'foot': 'designated', 'highway': 'track', 'horse': 'designated', 'lit': 'no', 'surface': 'compacted', 'tracktype': 'grade2', 'vehicle': 'no', 'width': '2'},
{'bus': 'yes', 'highway': 'bus_stop', 'name': 'Coulsdon Road / Lacey Drive', 'naptan:AtcoCode': '490008895N', 'naptan:Bearing': 'N', 'naptan:CommonName': 'Coulsdon Road Lacey Drive', 'naptan:Street': 'COULSDON ROAD', 'naptan:verified': 'yes', 'public_transport': 'platform', 'ref': '58016'},
{'bicycle': 'yes', 'foot': 'designated', 'highway': 'track', 'horse': 'designated', 'lit': 'no', 'name': 'Old Harrow Lane', 'surface': 'compacted', 'tracktype': 'grade2', 'vehicle': 'yes', 'width': '2'},
{'bus': 'yes', 'highway': 'bus_stop', 'name': 'Lacey Drive', 'naptan:AtcoCode': '490008895W', 'naptan:Bearing': 'W', 'naptan:CommonName': 'Coulsdon Road Lacey Drive', 'naptan:Street': 'LACEY DRIVE', 'naptan:verified': 'no', 'public_transport': 'platform', 'ref': '76343'},
{'bus': 'yes', 'highway': 'bus_stop', 'name': 'Coulsdon Road / Lacey Drive', 'naptan:AtcoCode': '490008895S', 'naptan:Bearing': 'N', 'naptan:CommonName': 'Coulsdon Road Lacey Drive', 'naptan:Street': 'COULSDON ROAD', 'naptan:verified': 'yes', 'public_transport': 'platform', 'ref': '55790'},
{'bus': 'yes', 'highway': 'bus_stop', 'name': 'Lacey Drive', 'naptan:AtcoCode': '490008495E', 'naptan:Bearing': 'N', 'naptan:CommonName': 'Inwood Avenue', 'naptan:Street': 'LACEY DRIVE', 'naptan:verified': 'no', 'public_transport': 'platform', 'ref': '71478'},
{'bus': 'yes', 'highway': 'bus_stop', 'name': 'Homefield Road', 'naptan:AtcoCode': '490014107N', 'naptan:Bearing': 'N', 'naptan:CommonName': 'Waddington Avenue Homefield Rd', 'naptan:Street': 'HOMEFIELD ROAD', 'naptan:verified': 'no', 'public_transport': 'platform', 'ref': '75223'}],
dtype=object)
print("All have exactly 10 tags:", (data["tags"].str.len() == 10).all())
All have exactly 10 tags: True
data = qosm.convert_geometry_to_geodataframe(
geometry_filter=qosm.geocode_to_geometry("Greater London"),
osm_extract_source="Geofabrik",
custom_sql_filter="id % 13 = 0 AND starts_with(id::STRING, '6')",
)
data
Finished operation in 0:00:16
| tags | geometry | |
|---|---|---|
| feature_id | ||
| way/675508340 | {'admin_level': '5', 'boundary': 'administrati... | LINESTRING (0.01498 51.29179, 0.01497 51.29182... |
| way/680964323 | {'highway': 'path'} | LINESTRING (-0.12213 51.29848, -0.12202 51.298... |
| node/6465586933 | {'barrier': 'kissing_gate'} | POINT (-0.09805 51.30019) |
| way/689216450 | {'highway': 'footway', 'surface': 'grass'} | LINESTRING (-0.09777 51.3001, -0.09745 51.3003... |
| way/680964310 | {'leisure': 'park'} | POLYGON ((-0.11948 51.30027, -0.11914 51.30029... |
| ... | ... | ... |
| way/676450996 | {'highway': 'service', 'service': 'parking_ais... | LINESTRING (-0.02865 51.68063, -0.02866 51.680... |
| node/6396478673 | {'highway': 'street_lamp', 'lamp_model': 'Oran... | POINT (-0.05036 51.68202) |
| way/68447262 | {'cycleway:both': 'no', 'highway': 'tertiary',... | LINESTRING (-0.11891 51.67939, -0.11873 51.679... |
| node/6285656689 | {'barrier': 'lift_gate'} | POINT (-0.1059 51.68389) |
| way/671215259 | {'highway': 'service'} | LINESTRING (-0.10592 51.68382, -0.1059 51.6838... |
13503 rows × 2 columns
print("All starting with digit 6:", data.index.map(lambda x: x.split("/")[1].startswith("6")).all())
print("All divisible by 13:", data.index.map(lambda x: (int(x.split("/")[1]) % 13) == 0).all())
All starting with digit 6: True All divisible by 13: True
Find features that have all selected tags present¶
When using osm_tags_filter with value { "building": True, "historic": True, "name": True }, the result will contain every feature that have at least one of those tags.
Positive tags filters are combined using an OR operator. You can read more about it here.
To get filters with AND operator, the custom_sql_filter parameter has to be used.
To match a list of keys against given values we have to use list-related functions.
More LIST functions are available here.
data = qosm.convert_geometry_to_geodataframe(
geometry_filter=qosm.geocode_to_geometry("Greater London"),
osm_extract_source="Geofabrik",
custom_sql_filter="list_has_all(map_keys(tags), ['building', 'historic', 'name'])",
)
data
Finished operation in 0:00:18
| tags | geometry | |
|---|---|---|
| feature_id | ||
| way/233651700 | {'building': 'yes', 'historic': 'building', 'n... | POLYGON ((-0.10039 51.303, -0.10047 51.30304, ... |
| way/1297142056 | {'building': 'yes', 'historic': 'building', 'n... | POLYGON ((-0.10281 51.30545, -0.10278 51.30551... |
| way/1297142047 | {'building': 'yes', 'historic': 'building', 'n... | POLYGON ((-0.09056 51.3059, -0.09056 51.30596,... |
| way/1297142057 | {'building': 'yes', 'historic': 'building', 'n... | POLYGON ((-0.10043 51.30646, -0.10051 51.30642... |
| way/1297142046 | {'building': 'yes', 'historic': 'building', 'n... | POLYGON ((-0.09006 51.30725, -0.09004 51.30731... |
| ... | ... | ... |
| way/496917465 | {'addr:city': 'London', 'addr:housename': 'Clo... | POLYGON ((-0.11332 51.64513, -0.11286 51.64529... |
| way/556704881 | {'building': 'yes', 'historic': 'folly', 'name... | POLYGON ((-0.06086 51.67419, -0.06085 51.6742,... |
| way/981954890 | {'building': 'yes', 'heritage:website': 'https... | POLYGON ((-0.10592 51.68797, -0.10587 51.68823... |
| way/981954889 | {'building': 'yes', 'heritage:website': 'https... | POLYGON ((-0.10622 51.68818, -0.10615 51.68851... |
| way/142289524 | {'building': 'yes', 'heritage:website': 'https... | POLYGON ((-0.11151 51.68876, -0.11181 51.68879... |
368 rows × 2 columns
tags_names = ["name", "building", "historic"]
for tag_name in tags_names:
data[tag_name] = data["tags"].apply(lambda x, tag_name=tag_name: x.get(tag_name))
data[[*tags_names, "geometry"]].explore(tiles="CartoDB DarkMatter", color="orange")
data = qosm.convert_geometry_to_geodataframe(
geometry_filter=qosm.geocode_to_geometry("Greater London"),
osm_extract_source="Geofabrik",
custom_sql_filter=r"""
list_has_all(map_keys(tags), ['highway', 'name'])
AND regexp_matches(tags['name'], '^(New|Old)\s\w+')
""",
)
data
Finished operation in 0:00:18
| tags | geometry | |
|---|---|---|
| feature_id | ||
| way/41979642 | {'highway': 'residential', 'maxspeed': '20 mph... | LINESTRING (-0.11013 51.29563, -0.10916 51.294... |
| way/158943362 | {'bicycle': 'yes', 'foot': 'designated', 'high... | LINESTRING (0.06978 51.30034, 0.06981 51.3003,... |
| way/959551596 | {'bicycle': 'yes', 'foot': 'designated', 'high... | LINESTRING (0.06607 51.30199, 0.06588 51.30183... |
| way/764385798 | {'bicycle': 'yes', 'highway': 'track', 'horse'... | LINESTRING (0.06487 51.30348, 0.06522 51.30323... |
| way/164859209 | {'bicycle': 'yes', 'designation': 'public_brid... | LINESTRING (0.06457 51.30363, 0.06487 51.30348) |
| ... | ... | ... |
| way/146466425 | {'access': 'permissive', 'bicycle': 'designate... | LINESTRING (-0.05884 51.67082, -0.05854 51.670... |
| way/1122120329 | {'highway': 'footway', 'name': 'New River Path... | LINESTRING (-0.05836 51.67067, -0.05833 51.670... |
| way/1122120327 | {'highway': 'footway', 'name': 'New River Path... | LINESTRING (-0.05815 51.67101, -0.05805 51.67115) |
| node/469777029 | {'highway': 'bus_stop', 'name': 'New Cottage F... | POINT (-0.15949 51.68403) |
| node/469777028 | {'bench': 'yes', 'highway': 'bus_stop', 'name'... | POINT (-0.16013 51.68457) |
1938 rows × 2 columns
ways_only = data[data.index.str.startswith("way/")]
ways_only["name"] = ways_only["tags"].apply(lambda x: x["name"])
ways_only["prefix"] = ways_only["name"].apply(lambda x: x.split()[0])
ways_only[["name", "prefix", "geometry"]].explore(
tiles="CartoDB DarkMatter", column="prefix", cmap=["orange", "royalblue"]
)