Skip to content

GeoVexEmbedder

srai.embedders.GeoVexEmbedder(
    target_features,
    batch_size=32,
    neighbourhood_radius=4,
    convolutional_layers=2,
    embedding_size=32,
    convolutional_layer_size=256,
)

Bases: CountEmbedder

GeoVex Embedder.

PARAMETER DESCRIPTION
target_features

The features that are to be used in the embedding. Should be in "flat" format, i.e. "_", or use OsmTagsFilter object.

TYPE: Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter]

batch_size

Batch size. Defaults to 32.

TYPE: int DEFAULT: 32

convolutional_layers

Number of convolutional layers. Defaults to 2.

TYPE: int DEFAULT: 2

neighbourhood_radius

Radius of the neighbourhood. Defaults to 4.

TYPE: int DEFAULT: 4

embedding_size

Size of the embedding. Defaults to 32.

TYPE: int DEFAULT: 32

convolutional_layer_size

Size of the first convolutional layer.

TYPE: int DEFAULT: 256

Source code in srai/embedders/geovex/embedder.py
def __init__(
    self,
    target_features: Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter],
    batch_size: Optional[int] = 32,
    neighbourhood_radius: int = 4,
    convolutional_layers: int = 2,
    embedding_size: int = 32,
    convolutional_layer_size: int = 256,
) -> None:
    """
    Initialize GeoVex Embedder.

    Args:
        target_features (Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter]): The features
            that are to be used in the embedding. Should be in "flat" format,
            i.e. "<super-tag>_<sub-tag>", or use OsmTagsFilter object.
        batch_size (int, optional): Batch size. Defaults to 32.
        convolutional_layers (int, optional): Number of convolutional layers. Defaults to 2.
        neighbourhood_radius (int, optional): Radius of the neighbourhood. Defaults to 4.
        embedding_size (int, optional): Size of the embedding. Defaults to 32.
        convolutional_layer_size (int, optional): Size of the first convolutional layer.
    """
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )

    super().__init__(
        expected_output_features=target_features,
    )

    self._assert_feature_length(self.expected_output_features, convolutional_layer_size)

    self._model: Optional[GeoVexModel] = None
    self._is_fitted = False
    self._r = neighbourhood_radius
    self._embedding_size = embedding_size
    self._convolutional_layers = convolutional_layers
    self._convolutional_layer_size = convolutional_layer_size

    self._batch_size = batch_size

    # save invalid h3s for later
    self._invalid_cells: list[str] = []
    self._dataset: DataLoader = None

invalid_cells: list[str] property

List of invalid h3s.

transform(regions_gdf, features_gdf, joint_gdf)

Create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

RETURNS DESCRIPTION
DataFrame

pd.DataFrame: Region embeddings.

Source code in srai/embedders/geovex/embedder.py
def transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
) -> pd.DataFrame:
    """
    Create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.

    Returns:
        pd.DataFrame: Region embeddings.
    """
    self._check_is_fitted()

    neighbourhood = H3Neighbourhood(
        regions_gdf=regions_gdf,
    )

    _, dataloader, self._dataset = self._prepare_dataset(
        regions_gdf,
        features_gdf,
        joint_gdf,
        neighbourhood,
        self._batch_size,
        shuffle=False,
    )

    return self._transform(dataset=self._dataset, dataloader=dataloader)

fit(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    learning_rate=0.001,
    trainer_kwargs=None,
)

Fit the model to the data.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

neighbourhood

The neighbourhood to use. Should be intialized with the same regions.

TYPE: H3Neighbourhood

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. This is where the number of epochs can be set. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

Source code in srai/embedders/geovex/embedder.py
def fit(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: H3Neighbourhood,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> None:
    """
    Fit the model to the data.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (H3Neighbourhood): The neighbourhood to use.
            Should be intialized with the same regions.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs.
            This is where the number of epochs can be set. Defaults to None.
    """
    import pytorch_lightning as pl

    trainer_kwargs = self._prepare_trainer_kwargs(trainer_kwargs)
    counts_df, dataloader, dataset = self._prepare_dataset(  # type: ignore
        regions_gdf, features_gdf, joint_gdf, neighbourhood, self._batch_size, shuffle=True
    )

    self._prepare_model(counts_df, learning_rate)

    trainer = pl.Trainer(**trainer_kwargs)
    trainer.fit(self._model, dataloader)
    self._is_fitted = True
    self._dataset = dataset

fit_transform(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    learning_rate=0.001,
    trainer_kwargs=None,
)

Fit the model to the data and create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

neighbourhood

The neighbourhood to use. Should be intialized with the same regions.

TYPE: H3Neighbourhood

negative_sample_k_distance

Distance of negative samples. Defaults to 2.

TYPE: int

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. This is where the number of epochs can be set. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

Source code in srai/embedders/geovex/embedder.py
def fit_transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: H3Neighbourhood,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> pd.DataFrame:
    """
    Fit the model to the data and create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (H3Neighbourhood): The neighbourhood to use.
            Should be intialized with the same regions.
        negative_sample_k_distance (int, optional): Distance of negative samples. Defaults to 2.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. This is where the
            number of epochs can be set. Defaults to None.
    """
    self.fit(
        regions_gdf=regions_gdf,
        features_gdf=features_gdf,
        joint_gdf=joint_gdf,
        neighbourhood=neighbourhood,
        learning_rate=learning_rate,
        trainer_kwargs=trainer_kwargs,
    )
    assert self._dataset is not None  # for mypy
    return self._transform(dataset=self._dataset)