Porto taxi
Porto Taxi Dataset¶
The dataset covers one year of taxi trajectory data from 442 vehicles operating in Porto, Portugal, between July 1, 2013, and June 30, 2014. Each completed trip is categorized as either (A) taxi central–based, (B) stand–based, or (C) non–taxi central–based, reflecting whether the ride was initiated through the dispatch central, a taxi stand, or a random street pickup. Each record includes metadata such as trip origin type, taxi and call identifiers, day type, and a GPS trajectory encoded as a sequence of geographic coordinates.
import folium
import h3
from IPython.display import display
from srai.datasets import PortoTaxiDataset
porto_taxi = PortoTaxiDataset()
type(porto_taxi.train_gdf), type(porto_taxi.test_gdf)
(NoneType, NoneType)
Get data using .load() method -> Default config (Travel time estimation)
ds = porto_taxi.load()
ds.keys()
dict_keys(['train', 'test'])
type(porto_taxi.train_gdf), type(porto_taxi.test_gdf)
(geopandas.geodataframe.GeoDataFrame, geopandas.geodataframe.GeoDataFrame)
ds["train"].head()
trip_id | h3_sequence | avg_speed_per_hex | timestamp | day_type | call_type | taxi_id | geometry | duration | |
---|---|---|---|---|---|---|---|---|---|
0 | 1372636853620000380 | [8939220f383ffff, 8939220f393ffff, 8939220f067... | [1.927607771705634, 1.927607771705634, 1.92760... | [2013-07-01T00:00:53.000000, 2013-07-01T00:00:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C, C, C, C] | [20000380, 20000380, 20000380, 20000380, 20000... | LINESTRING (-8.61029 41.14075, -8.61031 41.140... | 360.0 |
1 | 1372636858620000589 | [8939220f063ffff, 8939220f073ffff, 8939220f00b... | [3.0560328387467504, 3.0560328387467504, 49.10... | [2013-07-01T00:00:58.000000, 2013-07-01T00:00:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C] | [20000589, 20000589, 20000589, 20000589, 20000... | LINESTRING (-8.61864 41.14141, -8.6185 41.1413... | 330.0 |
2 | 1372636875620000233 | [8939220f00fffff, 8939220f007ffff, 8939220f017... | [9.213600054030238, 9.213600054030238, 23.0525... | [2013-07-01T00:01:15.000000, 2013-07-01T00:01:... | [A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C] | [20000233, 20000233, 20000233, 20000233, 20000... | LINESTRING (-8.61989 41.14801, -8.62016 41.147... | 315.0 |
3 | 1372636956620000167 | [8939220f0afffff, 8939220f1dbffff, 8939220f0af... | [7.325408241610683, 7.325408241610683, 23.4882... | [2013-07-01T00:02:36.000000, 2013-07-01T00:02:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000167, 20000167, 20000167, 20000167, 20000... | LINESTRING (-8.6133 41.15439, -8.61326 41.1538... | 465.0 |
4 | 1372636965620000231 | [8939220f067ffff, 8939220f39bffff, 8939220f383... | [24.42802472301524, 24.42802472301524, 24.4280... | [2013-07-01T00:02:45.000000, 2013-07-01T00:02:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000231, 20000231, 20000231, 20000231, 20000... | LINESTRING (-8.6155 41.14067, -8.61335 41.1415... | 375.0 |
ds["test"].head()
trip_id | h3_sequence | avg_speed_per_hex | timestamp | day_type | call_type | taxi_id | geometry | duration | |
---|---|---|---|---|---|---|---|---|---|
0 | 1372638160620000456 | [8939220f39bffff, 8939220f067ffff, 8939220f02b... | [24.38591002388346, 24.38591002388346, 24.3859... | [2013-07-01T00:22:40.000000, 2013-07-01T00:22:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000456, 20000456, 20000456, 20000456, 20000... | LINESTRING (-8.61215 41.14057, -8.61455 41.140... | 435.0 |
1 | 1372638504620000600 | [8939220c493ffff, 8939220e22fffff, 8939220e223... | [10.331445317923839, 10.331445317923839, 35.99... | [2013-07-01T00:28:24.000000, 2013-07-01T00:28:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [20000600, 20000600, 20000600, 20000600, 20000... | LINESTRING (-8.57362 41.17077, -8.57406 41.170... | 945.0 |
2 | 1372639848620000424 | [8939220f477ffff, 8939220f463ffff, 8939220f46b... | [3.3926150007803417, 3.3926150007803417, 13.70... | [2013-07-01T00:50:48.000000, 2013-07-01T00:50:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000424, 20000424, 20000424, 20000424, 20000... | LINESTRING (-8.63984 41.1598, -8.64034 41.1598... | 600.0 |
3 | 1372639875620000009 | [8939220e66fffff, 8939220e2d7ffff, 8939220e2c3... | [19.742960983412413, 19.742960983412413, 19.74... | [2013-07-01T00:51:15.000000, 2013-07-01T00:51:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000009, 20000009, 20000009, 20000009, 20000... | LINESTRING (-8.60007 41.1827, -8.5982 41.18226... | 660.0 |
4 | 1372641721620000686 | [8939220f02bffff, 8939220f02fffff, 8939220f027... | [12.46393351492334, 12.46393351492334, 12.4639... | [2013-07-01T01:22:01.000000, 2013-07-01T01:22:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, ... | [20000686, 20000686, 20000686, 20000686, 20000... | LINESTRING (-8.61253 41.14594, -8.61068 41.146... | 810.0 |
Getting h3 trajectories with target valeus
porto_taxi.resolution
9
train_h3, _, test_h3 = porto_taxi.get_h3_with_labels()
train_h3
trip_id | h3_sequence | duration | |
---|---|---|---|
0 | 1372636853620000380 | [8939220f383ffff, 8939220f393ffff, 8939220f067... | 360.0 |
1 | 1372636858620000589 | [8939220f063ffff, 8939220f073ffff, 8939220f00b... | 330.0 |
2 | 1372636875620000233 | [8939220f00fffff, 8939220f007ffff, 8939220f017... | 315.0 |
3 | 1372636956620000167 | [8939220f0afffff, 8939220f1dbffff, 8939220f0af... | 465.0 |
4 | 1372636965620000231 | [8939220f067ffff, 8939220f39bffff, 8939220f383... | 375.0 |
... | ... | ... | ... |
358953 | 1404171287620000013 | [8939220e66fffff, 8939220e2d7ffff, 8939220e2c3... | 1485.0 |
358954 | 1404171347620000328 | [8939220f1c7ffff, 8939220f1c3ffff, 8939220f1cf... | 585.0 |
358955 | 1404171590620000307 | [8939220f397ffff, 8939220f14bffff, 8939220f14f... | 1185.0 |
358956 | 1404171854620000114 | [8939220f477ffff, 8939220f473ffff, 8939220f477... | 555.0 |
358957 | 1404172369620000171 | [8939220f03bffff, 8939220f077ffff, 8939220f02b... | 345.0 |
358958 rows × 3 columns
test_h3
trip_id | h3_sequence | duration | |
---|---|---|---|
0 | 1372638160620000456 | [8939220f39bffff, 8939220f067ffff, 8939220f02b... | 435.0 |
1 | 1372638504620000600 | [8939220c493ffff, 8939220e22fffff, 8939220e223... | 945.0 |
2 | 1372639848620000424 | [8939220f477ffff, 8939220f463ffff, 8939220f46b... | 600.0 |
3 | 1372639875620000009 | [8939220e66fffff, 8939220e2d7ffff, 8939220e2c3... | 660.0 |
4 | 1372641721620000686 | [8939220f02bffff, 8939220f02fffff, 8939220f027... | 810.0 |
... | ... | ... | ... |
89735 | 1404170689620000387 | [8939220f083ffff, 8939220f09bffff, 8939220f093... | 450.0 |
89736 | 1404170875620000525 | [8939220f507ffff, 8939220f517ffff, 8939220f513... | 600.0 |
89737 | 1404171367620000670 | [8939220f383ffff, 8939220f39bffff, 8939220f067... | 435.0 |
89738 | 1404171374620000372 | [8939220f6b3ffff, 8939220f6b7ffff, 8939220f46b... | 555.0 |
89739 | 1404171442620000497 | [8939220f08bffff, 8939220f08fffff, 8939220f0bb... | 315.0 |
89740 rows × 3 columns
Get Human Mobility Prediction (HMP) data
ds = porto_taxi.load(version="HMP")
ds["train"].head()
trip_id | avg_speed_per_hex | timestamp | day_type | call_type | taxi_id | geometry | h3_sequence_x | h3_sequence_y | |
---|---|---|---|---|---|---|---|---|---|
0 | 1372636875620000233 | [9.213600054030238, 9.213600054030238, 23.0525... | [2013-07-01T00:01:15.000000, 2013-07-01T00:01:... | [A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C] | [20000233, 20000233, 20000233, 20000233, 20000... | LINESTRING (-8.61989 41.14801, -8.62016 41.147... | [8939220f00fffff, 8939220f007ffff, 8939220f017... | [8939220f1d3ffff, 8939220f1d3ffff] |
1 | 1372636965620000231 | [24.42802472301524, 24.42802472301524, 24.4280... | [2013-07-01T00:02:45.000000, 2013-07-01T00:02:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000231, 20000231, 20000231, 20000231, 20000... | LINESTRING (-8.6155 41.14067, -8.61335 41.1415... | [8939220f067ffff, 8939220f39bffff, 8939220f383... | [8939220c4cbffff, 8939220c4c3ffff, 8939220c4c3... |
2 | 1372637084620000285 | [9.733088667826689, 9.733088667826689, 9.73308... | [2013-07-01T00:04:44.000000, 2013-07-01T00:04:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000285, 20000285, 20000285, 20000285, 20000... | LINESTRING (-8.58568 41.14858, -8.58577 41.148... | [8939220f167ffff, 8939220f12bffff, 8939220f167... | [8939220c633ffff, 8939220c633ffff] |
3 | 1372637247620000067 | [6.5659677388795235, 6.5659677388795235, 34.91... | [2013-07-01T00:07:27.000000, 2013-07-01T00:07:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000067, 20000067, 20000067, 20000067, 20000... | LINESTRING (-8.62886 41.16097, -8.62853 41.160... | [8939220f097ffff, 8939220f083ffff, 8939220f087... | [8939220f023ffff, 8939220f023ffff] |
4 | 1372637610620000497 | [20.531007632411953, 20.531007632411953, 20.53... | [2013-07-01T00:13:30.000000, 2013-07-01T00:13:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, ... | [20000497, 20000497, 20000497, 20000497, 20000... | LINESTRING (-8.58514 41.16486, -8.58415 41.164... | [8939220e26bffff, 8939220f1b7ffff, 8939220f1b3... | [8939220f42bffff, 8939220f42fffff, 8939220f093... |
Creating your own train_test split based on trajectory duration (version TTE) or length version (HMP).
Downloading version all
without passing resolution, will return trajectories as linestring geometries.
ds = porto_taxi.load(version="all")
ds.keys()
dict_keys(['train'])
ds["train"].head()
trip_id | timestamp | call_type | origin_call | origin_stand | taxi_id | day_type | speed | geometry | |
---|---|---|---|---|---|---|---|---|---|
0 | 1372636853620000380 | [2013-07-01T00:00:53.000000, 2013-07-01T00:01:... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C] | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [20000380, 20000380, 20000380, 20000380, 20000... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A] | [1.927607771705634, 1.927607771705634, 43.9689... | LINESTRING (-8.61029 41.14075, -8.61031 41.140... |
1 | 1372636858620000589 | [2013-07-01T00:00:58.000000, 2013-07-01T00:01:... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C] | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [20000589, 20000589, 20000589, 20000589, 20000... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A] | [3.0560328387467504, 3.0560328387467504, 49.10... | LINESTRING (-8.61864 41.14141, -8.6185 41.1413... |
2 | 1372636875620000233 | [2013-07-01T00:01:15.000000, 2013-07-01T00:01:... | [C, C, C, C, C, C, C, C, C, C, C] | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [20000233, 20000233, 20000233, 20000233, 20000... | [A, A, A, A, A, A, A, A, A, A, A] | [9.213600054030238, 9.213600054030238, 23.0525... | LINESTRING (-8.61989 41.14801, -8.62016 41.147... |
3 | 1372636956620000167 | [2013-07-01T00:02:36.000000, 2013-07-01T00:03:... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [20000167, 20000167, 20000167, 20000167, 20000... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [7.325408241610683, 7.325408241610683, 23.4882... | LINESTRING (-8.6133 41.15439, -8.61326 41.1538... |
4 | 1372636965620000231 | [2013-07-01T00:02:45.000000, 2013-07-01T00:03:... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [nan, nan, nan, nan, nan, nan, nan, nan, nan, ... | [20000231, 20000231, 20000231, 20000231, 20000... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [24.42802472301524, 24.42802472301524, 70.2825... | LINESTRING (-8.6155 41.14067, -8.61335 41.1415... |
Passing resolution parameter is neccessary for generation of trajectory in h3 style.
Resolution
parameter is required to create h3 sequences from the linestring geometry.
ds = porto_taxi.load(version="all", resolution=10)
ds.keys()
dict_keys(['train'])
ds["train"].head()
trip_id | h3_sequence | avg_speed_per_hex | timestamp | day_type | call_type | taxi_id | geometry | |
---|---|---|---|---|---|---|---|---|
0 | 1372636853620000380 | [8a39220f391ffff, 8a39220f3917fff, 8a39220f064... | [1.927607771705634, 1.927607771705634, 1.92760... | [2013-07-01T00:00:53.000000, 2013-07-01T00:00:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C, C, C, C] | [20000380, 20000380, 20000380, 20000380, 20000... | LINESTRING (-8.61029 41.14075, -8.61031 41.140... |
1 | 1372636858620000589 | [8a39220f062ffff, 8a39220f0627fff, 8a39220f070... | [3.0560328387467504, 3.0560328387467504, 3.056... | [2013-07-01T00:00:58.000000, 2013-07-01T00:00:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C] | [20000589, 20000589, 20000589, 20000589, 20000... | LINESTRING (-8.61864 41.14141, -8.6185 41.1413... |
2 | 1372636875620000233 | [8a39220f00effff, 8a39220f0057fff, 8a39220f007... | [9.213600054030238, 9.213600054030238, 23.0525... | [2013-07-01T00:01:15.000000, 2013-07-01T00:01:... | [A, A, A, A, A, A, A, A, A, A, A] | [C, C, C, C, C, C, C, C, C, C, C] | [20000233, 20000233, 20000233, 20000233, 20000... | LINESTRING (-8.61989 41.14801, -8.62016 41.147... |
3 | 1372636956620000167 | [8a39220f0aeffff, 8a39220f0acffff, 8a39220f1d9... | [7.325408241610683, 7.325408241610683, 7.32540... | [2013-07-01T00:02:36.000000, 2013-07-01T00:02:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000167, 20000167, 20000167, 20000167, 20000... | LINESTRING (-8.6133 41.15439, -8.61326 41.1538... |
4 | 1372636965620000231 | [8a39220f0657fff, 8a39220f0647fff, 8a39220f064... | [24.42802472301524, 24.42802472301524, 24.4280... | [2013-07-01T00:02:45.000000, 2013-07-01T00:02:... | [A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, ... | [C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, ... | [20000231, 20000231, 20000231, 20000231, 20000... | LINESTRING (-8.6155 41.14067, -8.61335 41.1415... |
train, test = porto_taxi.train_test_split(
target_column="trip_id", task="HMP", test_size=0.2, n_bins=7
)
Created new train_gdf and test_gdf. Train len: 358960, test len: 89740
len(train), len(test)
(358960, 89740)
def visualize_h3_trajectories(
h3_sequences, map_center=(41.14075, -8.61029), zoom_start=12
):
"""
Visualize H3 sequences on a Folium map.
Args:
h3_sequences (List[List[str]]): A list of H3 sequences (trajectories).
map_center (Tuple[float, float]): Center of the map (lat, lon).
zoom_start (int): Initial zoom level.
"""
m = folium.Map(location=map_center, zoom_start=zoom_start, tiles="cartodbpositron")
colors = ["red", "blue", "green", "purple", "orange", "darkred", "lightblue"]
for i, sequence in enumerate(h3_sequences):
color = colors[i % len(colors)]
for h3_id in sequence:
boundary = h3.cell_to_boundary(
h3_id,
)
folium.Polygon(
locations=boundary, color=color, weight=2, fill=True, fill_opacity=0.3
).add_to(m)
return m
h3_sequences = train["h3_sequence"].tolist()
map_ = visualize_h3_trajectories(h3_sequences[0:10]) # visualize first 10 for speed
display(map_)