Load and save
In [1]:
Copied!
from pytorch_lightning import seed_everything
from srai.embedders import Hex2VecEmbedder
from srai.joiners import IntersectionJoiner
from srai.loaders import OSMOnlineLoader
from srai.neighbourhoods import H3Neighbourhood
from srai.plotting import plot_regions
from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf
from pytorch_lightning import seed_everything
from srai.embedders import Hex2VecEmbedder
from srai.joiners import IntersectionJoiner
from srai.loaders import OSMOnlineLoader
from srai.neighbourhoods import H3Neighbourhood
from srai.plotting import plot_regions
from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf
In [2]:
Copied!
SEED = 71
seed_everything(SEED)
SEED = 71
seed_everything(SEED)
Seed set to 71
Out[2]:
71
Load data from OSM¶
First use geocoding to get the area
In [3]:
Copied!
area_gdf = geocode_to_region_gdf("Wrocław, Poland")
plot_regions(area_gdf, tiles_style="CartoDB positron")
area_gdf = geocode_to_region_gdf("Wrocław, Poland")
plot_regions(area_gdf, tiles_style="CartoDB positron")
Out[3]:
Make this Notebook Trusted to load map: File -> Trust Notebook
Next, download the data for the selected region and the specified tags. We're using OSMOnlineLoader
here, as it's faster for low numbers of tags. In a real life scenario with more tags, you would likely want to use the OSMPbfLoader
.
In [4]:
Copied!
tags = {
"leisure": "park",
"landuse": "forest",
"amenity": ["bar", "restaurant", "cafe"],
"water": "river",
"sport": "soccer",
}
loader = OSMOnlineLoader()
features_gdf = loader.load(area_gdf, tags)
folium_map = plot_regions(area_gdf, colormap=["rgba(0,0,0,0)"], tiles_style="CartoDB positron")
features_gdf.explore(m=folium_map)
tags = {
"leisure": "park",
"landuse": "forest",
"amenity": ["bar", "restaurant", "cafe"],
"water": "river",
"sport": "soccer",
}
loader = OSMOnlineLoader()
features_gdf = loader.load(area_gdf, tags)
folium_map = plot_regions(area_gdf, colormap=["rgba(0,0,0,0)"], tiles_style="CartoDB positron")
features_gdf.explore(m=folium_map)
0%| | 0/7 [00:00<?, ?it/s]
Downloading leisure: park : 0%| | 0/7 [00:00<?, ?it/s]
Downloading leisure: park : 14%|████████████████████▋ | 1/7 [00:00<00:01, 3.28it/s]
Downloading landuse: forest : 14%|████████████████████▋ | 1/7 [00:00<00:01, 3.28it/s]
Downloading landuse: forest : 29%|█████████████████████████████████████████▍ | 2/7 [00:00<00:01, 2.87it/s]
Downloading amenity: bar : 29%|█████████████████████████████████████████▍ | 2/7 [00:00<00:01, 2.87it/s]
Downloading amenity: bar : 43%|██████████████████████████████████████████████████████████████▏ | 3/7 [00:00<00:01, 3.59it/s]
Downloading amenity: restaurant: 43%|██████████████████████████████████████████████████████████████▏ | 3/7 [00:00<00:01, 3.59it/s]
Downloading amenity: restaurant: 57%|██████████████████████████████████████████████████████████████████████████████████▊ | 4/7 [00:01<00:00, 3.83it/s]
Downloading amenity: cafe : 57%|██████████████████████████████████████████████████████████████████████████████████▊ | 4/7 [00:01<00:00, 3.83it/s]
Downloading amenity: cafe : 71%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 5/7 [00:01<00:00, 4.09it/s]
Downloading water: river : 71%|███████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 5/7 [00:01<00:00, 4.09it/s]
Downloading water: river : 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6/7 [00:01<00:00, 3.92it/s]
Downloading sport: soccer : 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎ | 6/7 [00:01<00:00, 3.92it/s]
Downloading sport: soccer : 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:01<00:00, 4.11it/s]
Downloading sport: soccer : 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:01<00:00, 3.84it/s]
Out[4]:
Make this Notebook Trusted to load map: File -> Trust Notebook
Prepare the data for embedding¶
After downloading the data, we need to prepare it for embedding. Namely - we need to regionalize the selected area, and join the features with regions.
In [5]:
Copied!
regionalizer = H3Regionalizer(resolution=9)
regions_gdf = regionalizer.transform(area_gdf)
plot_regions(regions_gdf, tiles_style="CartoDB positron")
regionalizer = H3Regionalizer(resolution=9)
regions_gdf = regionalizer.transform(area_gdf)
plot_regions(regions_gdf, tiles_style="CartoDB positron")
Out[5]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [6]:
Copied!
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf)
joint_gdf
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf)
joint_gdf
Out[6]:
region_id | feature_id |
---|---|
891e204182fffff | way/585624450 |
891e205186fffff | relation/2950476 |
relation/12226375 | |
way/899316003 | |
way/1045531124 | |
... | ... |
891e2040cc7ffff | way/308849508 |
way/308849497 | |
891e2040847ffff | way/309536015 |
node/6276681768 | |
891e2040eafffff | node/5470047133 |
4020 rows × 0 columns
Embedding¶
After preparing the data we can proceed with generating embeddings for the regions.
In [7]:
Copied!
import warnings
neighbourhood = H3Neighbourhood(regions_gdf)
embedder = Hex2VecEmbedder([15, 10])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
embeddings = embedder.fit_transform(
regions_gdf,
features_gdf,
joint_gdf,
neighbourhood,
trainer_kwargs={"max_epochs": 5, "accelerator": "cpu"},
batch_size=100,
)
embeddings
import warnings
neighbourhood = H3Neighbourhood(regions_gdf)
embedder = Hex2VecEmbedder([15, 10])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
embeddings = embedder.fit_transform(
regions_gdf,
features_gdf,
joint_gdf,
neighbourhood,
trainer_kwargs={"max_epochs": 5, "accelerator": "cpu"},
batch_size=100,
)
embeddings
0%| | 0/3168 [00:00<?, ?it/s]
83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍ | 2614/3168 [00:00<00:00, 26124.45it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3168/3168 [00:00<00:00, 25748.33it/s]
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
| Name | Type | Params --------------------------------------- 0 | encoder | Sequential | 280 --------------------------------------- 280 Trainable params 0 Non-trainable params 280 Total params 0.001 Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Out[7]:
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
region_id | ||||||||||
891e2041863ffff | 0.359450 | -0.208136 | -0.002650 | 0.325931 | 0.215098 | -0.140461 | -0.239743 | -0.121372 | -0.195292 | 0.341896 |
891e2040137ffff | 0.359450 | -0.208136 | -0.002650 | 0.325931 | 0.215098 | -0.140461 | -0.239743 | -0.121372 | -0.195292 | 0.341896 |
891e2047463ffff | 0.359450 | -0.208136 | -0.002650 | 0.325931 | 0.215098 | -0.140461 | -0.239743 | -0.121372 | -0.195292 | 0.341896 |
891e204182fffff | 0.052792 | 0.235472 | 0.462689 | 0.512266 | -0.195567 | -0.012775 | 0.395774 | -0.384147 | 0.421536 | 0.004538 |
891e205186fffff | -0.773747 | 0.629639 | -0.051571 | -0.320957 | -0.335133 | 0.267245 | 0.180342 | 0.113288 | 0.327416 | -0.326109 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
891e204e1abffff | 0.359450 | -0.208136 | -0.002650 | 0.325931 | 0.215098 | -0.140461 | -0.239743 | -0.121372 | -0.195292 | 0.341896 |
891e2040847ffff | 0.036413 | 0.297599 | 0.493102 | 0.569729 | -0.166154 | -0.119122 | 0.580659 | -0.393856 | 0.542635 | -0.037937 |
891e2040eafffff | 0.443952 | -0.295046 | 0.044628 | 0.197060 | 0.115577 | -0.170690 | -0.003142 | -0.232593 | -0.140660 | 0.107267 |
891e2043347ffff | 0.359450 | -0.208136 | -0.002650 | 0.325931 | 0.215098 | -0.140461 | -0.239743 | -0.121372 | -0.195292 | 0.341896 |
891e204503bffff | 0.359450 | -0.208136 | -0.002650 | 0.325931 | 0.215098 | -0.140461 | -0.239743 | -0.121372 | -0.195292 | 0.341896 |
3168 rows × 10 columns
Visualizing the embeddings' similarity¶
In [8]:
Copied!
embedder.save("./modello")
embedder.save("./modello")
In [9]:
Copied!
embedder_loaded = Hex2VecEmbedder.load("./modello")
embedder_loaded
embedder_loaded = Hex2VecEmbedder.load("./modello")
embedder_loaded
Out[9]:
<srai.embedders.hex2vec.embedder.Hex2VecEmbedder at 0x7f1236b34e50>
In [10]:
Copied!
from srai.embedders import Highway2VecEmbedder
from srai.loaders import OSMNetworkType, OSMWayLoader
d = OSMWayLoader(OSMNetworkType.DRIVE).load(area_gdf)
from srai.embedders import Highway2VecEmbedder
from srai.loaders import OSMNetworkType, OSMWayLoader
d = OSMWayLoader(OSMNetworkType.DRIVE).load(area_gdf)
/root/development/srai/srai/loaders/osm_way_loader/osm_way_loader.py:229: FutureWarning: The clean_periphery argument has been deprecated and will be removed in the v2.0.0 release. Future behavior will be as though clean_periphery=True. See the OSMnx v2 migration guide: https://github.com/gboeing/osmnx/issues/1123 G_directed = ox.graph_from_polygon(
/root/development/srai/.venv/lib/python3.10/site-packages/osmnx/_overpass.py:350: FutureWarning: `settings.timeout` is deprecated and will be removed in the v2.0.0 release: use `settings.requests_timeout` instead. See the OSMnx v2 migration guide: https://github.com/gboeing/osmnx/issues/1123 overpass_settings = _make_overpass_settings()
/root/development/srai/.venv/lib/python3.10/site-packages/osmnx/_overpass.py:360: FutureWarning: `settings.timeout` is deprecated and will be removed in the v2.0.0 release: use `settings.requests_timeout` instead. See the OSMnx v2 migration guide: https://github.com/gboeing/osmnx/issues/1123 yield _overpass_request(data={"data": query_str})
/root/development/srai/srai/loaders/osm_way_loader/osm_way_loader.py:237: FutureWarning: The `get_undirected` function is deprecated and will be removed in the v2.0.0 release. Replace it with `convert.to_undirected` instead. See the OSMnx v2 migration guide: https://github.com/gboeing/osmnx/issues/1123 G_undirected = ox.utils_graph.get_undirected(G_directed)
In [11]:
Copied!
joint = joiner.transform(regions_gdf, d[1])
joint = joiner.transform(regions_gdf, d[1])
In [12]:
Copied!
highway2vec = Highway2VecEmbedder()
highway2vec.fit(regions_gdf, d[1], joint)
highway2vec = Highway2VecEmbedder()
highway2vec.fit(regions_gdf, d[1], joint)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
| Name | Type | Params --------------------------------------- 0 | encoder | Sequential | 16.0 K 1 | decoder | Sequential | 16.2 K --------------------------------------- 32.1 K Trainable params 0 Non-trainable params 32.1 K Total params 0.128 Total estimated model params size (MB)
/root/development/srai/.venv/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
`Trainer.fit` stopped: `max_epochs=10` reached.
In [13]:
Copied!
highway2vec.save("highway2vec")
highway2vec.save("highway2vec")
In [14]:
Copied!
vars(highway2vec)
vars(highway2vec)
Out[14]:
{'_model': Highway2VecModel( (encoder): Sequential( (0): Linear(in_features=218, out_features=64, bias=True) (1): ReLU() (2): Linear(in_features=64, out_features=30, bias=True) ) (decoder): Sequential( (0): Linear(in_features=30, out_features=64, bias=True) (1): ReLU() (2): Linear(in_features=64, out_features=218, bias=True) ) ), '_hidden_size': 64, '_embedding_size': 30, '_is_fitted': True}
In [15]:
Copied!
Highway2VecEmbedder.load("highway2vec")
Highway2VecEmbedder.load("highway2vec")
Out[15]:
<srai.embedders.highway2vec.embedder.Highway2VecEmbedder at 0x7f1236b359c0>
In [16]:
Copied!
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon
from srai.constants import REGIONS_INDEX
from srai.embedders import GTFS2VecEmbedder
features_gdf = gpd.GeoDataFrame(
{
"trip_count_at_6": [1, 0, 0],
"trip_count_at_7": [1, 1, 0],
"trip_count_at_8": [0, 0, 1],
"directions_at_6": [
{"A", "A1"},
{"B", "B1"},
{"C"},
],
},
geometry=gpd.points_from_xy([1, 2, 5], [1, 2, 2]),
index=pd.Index(name="stop_id", data=[1, 2, 3]),
)
regions_gdf = gpd.GeoDataFrame(
geometry=[
Polygon([(0, 0), (0, 3), (3, 3), (3, 0)]),
Polygon([(4, 0), (4, 3), (7, 3), (7, 0)]),
Polygon([(8, 0), (8, 3), (11, 3), (11, 0)]),
],
index=pd.Index(name=REGIONS_INDEX, data=["ff1", "ff2", "ff3"]),
)
joint_gdf = gpd.GeoDataFrame()
joint_gdf.index = pd.MultiIndex.from_tuples(
[("ff1", 1), ("ff1", 2), ("ff2", 3)],
names=[REGIONS_INDEX, "stop_id"],
)
embedder = GTFS2VecEmbedder(hidden_size=2, embedding_size=4)
embedder.fit(regions_gdf, features_gdf, joint_gdf)
res = embedder.transform(regions_gdf, features_gdf, joint_gdf)
res
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon
from srai.constants import REGIONS_INDEX
from srai.embedders import GTFS2VecEmbedder
features_gdf = gpd.GeoDataFrame(
{
"trip_count_at_6": [1, 0, 0],
"trip_count_at_7": [1, 1, 0],
"trip_count_at_8": [0, 0, 1],
"directions_at_6": [
{"A", "A1"},
{"B", "B1"},
{"C"},
],
},
geometry=gpd.points_from_xy([1, 2, 5], [1, 2, 2]),
index=pd.Index(name="stop_id", data=[1, 2, 3]),
)
regions_gdf = gpd.GeoDataFrame(
geometry=[
Polygon([(0, 0), (0, 3), (3, 3), (3, 0)]),
Polygon([(4, 0), (4, 3), (7, 3), (7, 0)]),
Polygon([(8, 0), (8, 3), (11, 3), (11, 0)]),
],
index=pd.Index(name=REGIONS_INDEX, data=["ff1", "ff2", "ff3"]),
)
joint_gdf = gpd.GeoDataFrame()
joint_gdf.index = pd.MultiIndex.from_tuples(
[("ff1", 1), ("ff1", 2), ("ff2", 3)],
names=[REGIONS_INDEX, "stop_id"],
)
embedder = GTFS2VecEmbedder(hidden_size=2, embedding_size=4)
embedder.fit(regions_gdf, features_gdf, joint_gdf)
res = embedder.transform(regions_gdf, features_gdf, joint_gdf)
res
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
| Name | Type | Params --------------------------------------- 0 | encoder | Sequential | 16 1 | decoder | Sequential | 13 --------------------------------------- 29 Trainable params 0 Non-trainable params 29 Total params 0.000 Total estimated model params size (MB)
/root/development/srai/.venv/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
`Trainer.fit` stopped: `max_epochs=10` reached.
Out[16]:
0 | 1 | 2 | 3 | |
---|---|---|---|---|
region_id | ||||
ff1 | 0.687915 | 0.153115 | -0.648070 | -0.417944 |
ff2 | 0.849668 | 0.173882 | -0.952428 | -0.055791 |
ff3 | 0.913412 | 0.186434 | -1.057311 | 0.080431 |
In [17]:
Copied!
embedder.save("gtfs2vec")
embedder.save("gtfs2vec")
In [18]:
Copied!
a = embedder.load("gtfs2vec")
a = embedder.load("gtfs2vec")
In [19]:
Copied!
a = embedder.transform(regions_gdf, features_gdf, joint_gdf)
a = embedder.transform(regions_gdf, features_gdf, joint_gdf)
In [20]:
Copied!
a
a
Out[20]:
0 | 1 | 2 | 3 | |
---|---|---|---|---|
region_id | ||||
ff1 | 0.687915 | 0.153115 | -0.648070 | -0.417944 |
ff2 | 0.849668 | 0.173882 | -0.952428 | -0.055791 |
ff3 | 0.913412 | 0.186434 | -1.057311 | 0.080431 |