Skip to content

Index

GeoVex.

HexagonalDataset(data, neighbourhood, neighbor_k_ring=6)

Bases: Dataset['torch.Tensor'], Generic[T]

Dataset for the hexagonal encoder model.

It works by returning a 3d tensor of hexagonal regions. The tensor is a cube with the target hexagonal region in the center, and the rings of neighbors around surrounding it.

PARAMETER DESCRIPTION
data

Data to use for training. Raw counts of features in regions.

TYPE: DataFrame

neighbourhood

H3Neighbourhood to use for training. It has to be initialized with the same data as the data argument.

TYPE: H3Neighbourhood

neighbor_k_ring

The hexagonal rings of neighbors to include in the tensor. Defaults to 6.

TYPE: int DEFAULT: 6

Source code in srai/embedders/geovex/dataset.py
def __init__(
    self,
    data: pd.DataFrame,
    neighbourhood: H3Neighbourhood,
    neighbor_k_ring: int = 6,
):
    """
    Initialize the HexagonalDataset.

    Args:
        data (pd.DataFrame): Data to use for training. Raw counts of features in regions.
        neighbourhood (H3Neighbourhood): H3Neighbourhood to use for training.
            It has to be initialized with the same data as the data argument.
        neighbor_k_ring (int, optional): The hexagonal rings of neighbors to include
            in the tensor. Defaults to 6.
    """
    import_optional_dependencies(dependency_group="torch", modules=["torch"])
    import torch

    self._assert_k_ring_correct(neighbor_k_ring)
    self._assert_h3_neighbourhood(neighbourhood)
    # store the desired k
    self._k: int = neighbor_k_ring
    # number of columns in the dataset
    self._N: int = data.shape[1]
    # store the list of valid h3 indices (have all the neighbors in the dataset)
    self._valid_cells: list[CellInfo] = []
    # store the data as a torch tensor
    self._data_torch = torch.Tensor(data.to_numpy(dtype=np.float32))
    # iterate over the data and build the valid h3 indices
    self._invalid_cells, self._valid_cells = self._seperate_valid_invalid_cells(
        data, neighbourhood, neighbor_k_ring, set(data.index)
    )

__len__()

Returns the number of valid h3 indices in the dataset.

RETURNS DESCRIPTION
int

Number of valid h3 indices in the dataset.

TYPE: int

Source code in srai/embedders/geovex/dataset.py
def __len__(self) -> int:
    """
    Returns the number of valid h3 indices in the dataset.

    Returns:
        int: Number of valid h3 indices in the dataset.
    """
    return len(self._valid_cells)

__getitem__(index)

Return a single item from the dataset.

PARAMETER DESCRIPTION
index

The index of dataset item to return

TYPE: Any

RETURNS DESCRIPTION
HexagonalDatasetItem

The dataset item

TYPE: Tensor

Source code in srai/embedders/geovex/dataset.py
def __getitem__(self, index: Any) -> "torch.Tensor":
    """
    Return a single item from the dataset.

    Args:
        index (Any): The index of dataset item to return

    Returns:
        HexagonalDatasetItem: The dataset item
    """
    _, target_idx, neighbors_idxs = self._valid_cells[index]
    return self._build_tensor(target_idx, neighbors_idxs)

get_valid_cells()

Returns the list of valid h3 indices in the dataset.

RETURNS DESCRIPTION
list[str]

List[str]: List of valid h3 indices in the dataset.

Source code in srai/embedders/geovex/dataset.py
def get_valid_cells(self) -> list[str]:
    """
    Returns the list of valid h3 indices in the dataset.

    Returns:
        List[str]: List of valid h3 indices in the dataset.
    """
    return [h3_index for h3_index, _, _ in self._valid_cells]

get_invalid_cells()

Returns the list of invalid h3 indices in the dataset.

RETURNS DESCRIPTION
list[str]

List[str]: List of invalid h3 indices in the dataset.

Source code in srai/embedders/geovex/dataset.py
def get_invalid_cells(self) -> list[str]:
    """
    Returns the list of invalid h3 indices in the dataset.

    Returns:
        List[str]: List of invalid h3 indices in the dataset.
    """
    return list(self._invalid_cells)

GeoVexEmbedder(
    target_features,
    batch_size=32,
    neighbourhood_radius=4,
    convolutional_layers=2,
    embedding_size=32,
    convolutional_layer_size=256,
)

Bases: CountEmbedder

GeoVex Embedder.

PARAMETER DESCRIPTION
target_features

The features that are to be used in the embedding. Should be in "flat" format, i.e. "_", or use OsmTagsFilter object.

TYPE: Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter]

batch_size

Batch size. Defaults to 32.

TYPE: int DEFAULT: 32

convolutional_layers

Number of convolutional layers. Defaults to 2.

TYPE: int DEFAULT: 2

neighbourhood_radius

Radius of the neighbourhood. Defaults to 4.

TYPE: int DEFAULT: 4

embedding_size

Size of the embedding. Defaults to 32.

TYPE: int DEFAULT: 32

convolutional_layer_size

Size of the first convolutional layer.

TYPE: int DEFAULT: 256

Source code in srai/embedders/geovex/embedder.py
def __init__(
    self,
    target_features: Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter],
    batch_size: Optional[int] = 32,
    neighbourhood_radius: int = 4,
    convolutional_layers: int = 2,
    embedding_size: int = 32,
    convolutional_layer_size: int = 256,
) -> None:
    """
    Initialize GeoVex Embedder.

    Args:
        target_features (Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter]): The features
            that are to be used in the embedding. Should be in "flat" format,
            i.e. "<super-tag>_<sub-tag>", or use OsmTagsFilter object.
        batch_size (int, optional): Batch size. Defaults to 32.
        convolutional_layers (int, optional): Number of convolutional layers. Defaults to 2.
        neighbourhood_radius (int, optional): Radius of the neighbourhood. Defaults to 4.
        embedding_size (int, optional): Size of the embedding. Defaults to 32.
        convolutional_layer_size (int, optional): Size of the first convolutional layer.
    """
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )

    super().__init__(
        expected_output_features=target_features,
    )

    self._assert_feature_length(self.expected_output_features, convolutional_layer_size)

    self._model: Optional[GeoVexModel] = None
    self._is_fitted = False
    self._r = neighbourhood_radius
    self._embedding_size = embedding_size
    self._convolutional_layers = convolutional_layers
    self._convolutional_layer_size = convolutional_layer_size

    self._batch_size = batch_size

    # save invalid h3s for later
    self._invalid_cells: list[str] = []
    self._dataset: DataLoader = None

invalid_cells: list[str] property

List of invalid h3s.

transform(regions_gdf, features_gdf, joint_gdf)

Create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

RETURNS DESCRIPTION
DataFrame

pd.DataFrame: Region embeddings.

Source code in srai/embedders/geovex/embedder.py
def transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
) -> pd.DataFrame:
    """
    Create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.

    Returns:
        pd.DataFrame: Region embeddings.
    """
    self._check_is_fitted()

    neighbourhood = H3Neighbourhood(
        regions_gdf=regions_gdf,
    )

    _, dataloader, self._dataset = self._prepare_dataset(
        regions_gdf,
        features_gdf,
        joint_gdf,
        neighbourhood,
        self._batch_size,
        shuffle=False,
    )

    return self._transform(dataset=self._dataset, dataloader=dataloader)

fit(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    learning_rate=0.001,
    trainer_kwargs=None,
)

Fit the model to the data.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

neighbourhood

The neighbourhood to use. Should be intialized with the same regions.

TYPE: H3Neighbourhood

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. This is where the number of epochs can be set. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

Source code in srai/embedders/geovex/embedder.py
def fit(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: H3Neighbourhood,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> None:
    """
    Fit the model to the data.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (H3Neighbourhood): The neighbourhood to use.
            Should be intialized with the same regions.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs.
            This is where the number of epochs can be set. Defaults to None.
    """
    import pytorch_lightning as pl

    trainer_kwargs = self._prepare_trainer_kwargs(trainer_kwargs)
    counts_df, dataloader, dataset = self._prepare_dataset(  # type: ignore
        regions_gdf, features_gdf, joint_gdf, neighbourhood, self._batch_size, shuffle=True
    )

    self._prepare_model(counts_df, learning_rate)

    trainer = pl.Trainer(**trainer_kwargs)
    trainer.fit(self._model, dataloader)
    self._is_fitted = True
    self._dataset = dataset

fit_transform(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    learning_rate=0.001,
    trainer_kwargs=None,
)

Fit the model to the data and create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

neighbourhood

The neighbourhood to use. Should be intialized with the same regions.

TYPE: H3Neighbourhood

negative_sample_k_distance

Distance of negative samples. Defaults to 2.

TYPE: int

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. This is where the number of epochs can be set. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

Source code in srai/embedders/geovex/embedder.py
def fit_transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: H3Neighbourhood,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> pd.DataFrame:
    """
    Fit the model to the data and create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (H3Neighbourhood): The neighbourhood to use.
            Should be intialized with the same regions.
        negative_sample_k_distance (int, optional): Distance of negative samples. Defaults to 2.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. This is where the
            number of epochs can be set. Defaults to None.
    """
    self.fit(
        regions_gdf=regions_gdf,
        features_gdf=features_gdf,
        joint_gdf=joint_gdf,
        neighbourhood=neighbourhood,
        learning_rate=learning_rate,
        trainer_kwargs=trainer_kwargs,
    )
    assert self._dataset is not None  # for mypy
    return self._transform(dataset=self._dataset)

save(path)

Save the model to a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Union[str, Any]

Source code in srai/embedders/geovex/embedder.py
def save(self, path: Union[str, Any]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Union[str, Any]): Path to the directory.
    """
    # embedder_config must match the constructor signature:
    # target_features: Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter],
    # batch_size: Optional[int] = 32,
    # neighbourhood_radius: int = 4,
    # convolutional_layers: int = 2,
    # embedding_size: int = 32,
    # convolutional_layer_size: int = 256,
    embedder_config = {
        "target_features": self.expected_output_features.to_json(),
        "batch_size": self._batch_size,
        "neighbourhood_radius": self._r,
        "convolutional_layers": self._convolutional_layers,
        "embedding_size": self._embedding_size,
        "convolutional_layer_size": self._convolutional_layer_size,
    }
    self._save(path, embedder_config)

load(path)

classmethod

Load the model from a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Union[Path, str]

model_module

Model class.

TYPE: type[ModelT]

RETURNS DESCRIPTION
GeoVexEmbedder

GeoVexEmbedder object.

TYPE: GeoVexEmbedder

Source code in srai/embedders/geovex/embedder.py
@classmethod
def load(cls, path: Union[Path, str]) -> "GeoVexEmbedder":
    """
    Load the model from a directory.

    Args:
        path (Union[Path, str]): Path to the directory.
        model_module (type[ModelT]): Model class.

    Returns:
        GeoVexEmbedder: GeoVexEmbedder object.
    """
    return cls._load(path, GeoVexModel)

GeoVexModel(
    k_dim,
    radius,
    conv_layers=2,
    emb_size=32,
    learning_rate=1e-05,
    conv_layer_size=256,
)

Bases: Model

GeoVeX Model.

This class implements the GeoVeX model. It is based on a convolutional autoencoder with a Zero- Inflated Poisson layer. The model is described in [1]. It takes a 3d tensor as input (counts of features per region) and outputs dense embeddings. The 3d tensor consists of the target region at the center and radius R neighbors around it.

PARAMETER DESCRIPTION
k_dim

the number of input channels

TYPE: int

radius

the radius of the hexagonal region

TYPE: int

conv_layers

The number of convolutional layers. Defaults to 2.

TYPE: int DEFAULT: 2

emb_size

The dimension of the inner embedding. Defaults to 32.

TYPE: int DEFAULT: 32

learning_rate

The learning rate. Defaults to 1e-5.

TYPE: float DEFAULT: 1e-05

conv_layer_size

The size of the initial convolutional layer.

TYPE: int DEFAULT: 256

Source code in srai/embedders/geovex/model.py
def __init__(
    self,
    k_dim: int,
    radius: int,
    conv_layers: int = 2,
    emb_size: int = 32,
    learning_rate: float = 1e-5,
    conv_layer_size: int = 256,
):
    """
    Initialize the GeoVeX model.

    Args:
        k_dim (int): the number of input channels
        radius (int): the radius of the hexagonal region
        conv_layers (int, optional): The number of convolutional layers. Defaults to 2.
        emb_size (int, optional): The dimension of the inner embedding. Defaults to 32.
        learning_rate (float, optional): The learning rate. Defaults to 1e-5.
        conv_layer_size (int, optional): The size of the initial convolutional layer.
    """
    if k_dim < conv_layer_size:
        raise ValueError(f"k_dim must be greater than {conv_layer_size}")

    if conv_layers < 2:
        raise ValueError("conv_layers must be greater than 1")

    if radius < 2:
        raise ValueError("R must be greater than 1")

    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )
    from torch import nn

    super().__init__()

    self.k_dim = k_dim

    self.R = radius
    self.lr = learning_rate
    self.emb_size = emb_size
    self.conv_layer_size = conv_layer_size
    self.conv_layers = conv_layers

    # input size is 2R + 2
    self.M = get_shape(self.R)

    # calculate the padding to preserve the input size
    #  equation for output size with stride is
    #  out_size = (in_size - kernel_size + padding + stride) / stride
    stride = 2
    kernel_size = 3
    padding = math.ceil(((stride - 1) * self.M - stride + kernel_size) / 2)

    # find the size of the linear layer
    # equation is the output size of the conv layers
    in_size = self.M
    ll_padding = 0 if self.R < 5 else 1
    for _ in range(conv_layers - 1):
        out_size = math.floor(
            (in_size - kernel_size + ll_padding + stride) / stride,
        )
        in_size = out_size

    conv_sizes = [conv_layer_size * 2**i for i in range(conv_layers)]
    self.encoder = nn.Sequential(
        nn.BatchNorm2d(self.k_dim),
        nn.ReLU(),
        # have to add padding to preserve the input size
        HexagonalConv2d(self.k_dim, conv_sizes[0], kernel_size=3, stride=2, padding=padding),
        nn.BatchNorm2d(conv_layer_size),
        nn.ReLU(),
        *(
            # second conv block
            nn.Sequential(
                HexagonalConv2d(
                    conv_sizes[i - 1],
                    conv_sizes[i],
                    kernel_size=3,
                    stride=2,
                    padding=ll_padding,
                ),
                nn.BatchNorm2d(conv_sizes[i]),
                nn.ReLU(),
            )
            for i in range(1, conv_layers)
        ),
        # flatten
        nn.Flatten(),
        nn.Linear(out_size**2 * conv_sizes[-1], self.emb_size),
    )

    self.decoder = nn.Sequential(
        nn.Linear(self.emb_size, out_size**2 * conv_sizes[-1]),
        # maintain the batch size, but reshape the rest
        Reshape((-1, conv_sizes[-1], out_size, out_size)),
        # decoder has conv transpose layers - 1,
        # as the reshape layer is the first "transpose layer"
        *(
            nn.Sequential(
                HexagonalConvTranspose2d(
                    conv_sizes[-1 * (i + 1)],
                    conv_sizes[-1 * (i + 2)],
                    kernel_size=3,
                    stride=2,
                    output_padding=1,
                    padding=ll_padding,
                ),
                nn.BatchNorm2d(conv_sizes[-1 * (i + 2)]),
                nn.ReLU(),
            )
            for i in range(conv_layers - 1)
        ),
        GeoVeXZIP(conv_sizes[0], self.M, self.k_dim),
    )

    self._loss = GeoVeXLoss(self.R)

save(path)

Save the model to a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Path

Source code in srai/embedders/_base.py
def save(self, path: Union[Path, str]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Path): Path to the directory.
    """
    import torch

    torch.save(self.state_dict(), path)

load(path, **kwargs)

classmethod

Load model from a file.

PARAMETER DESCRIPTION
path

Path to the file.

TYPE: Union[Path, str]

**kwargs

Additional kwargs to pass to the model constructor.

TYPE: dict DEFAULT: {}

Source code in srai/embedders/_base.py
@classmethod
def load(cls, path: Union[Path, str], **kwargs: Any) -> "Model":
    """
    Load model from a file.

    Args:
        path (Union[Path, str]): Path to the file.
        **kwargs (dict): Additional kwargs to pass to the model constructor.
    """
    import torch

    if isinstance(path, str):
        path = Path(path)

    model = cls(**kwargs)
    model.load_state_dict(torch.load(path))
    return model

forward(x)

Forward pass of the GeoVeX model.

PARAMETER DESCRIPTION
x

The input tensor. The dimensions are (batch_size, k_dim, R * 2 + 1, R * 2 + 1).

TYPE: Tensor

RETURNS DESCRIPTION
tuple[Tensor, Tensor]

torch.Tensor: The output tensor.

Source code in srai/embedders/geovex/model.py
def forward(self, x: "torch.Tensor") -> tuple["torch.Tensor", "torch.Tensor"]:
    """
    Forward pass of the GeoVeX model.

    Args:
        x (torch.Tensor): The input tensor. The dimensions are
            (batch_size, k_dim, R * 2 + 1, R * 2 + 1).

    Returns:
        torch.Tensor: The output tensor.
    """
    res: tuple[torch.Tensor, torch.Tensor] = self.decoder(self.encoder(x))
    return res[0], res[1]

training_step(batch, batch_idx)

Perform a training step. This is called by PyTorch Lightning.

One training step consists of a forward pass, a loss calculation, and a backward pass.

PARAMETER DESCRIPTION
batch

The batch of data.

TYPE: List[Tensor]

batch_idx

The index of the batch.

TYPE: int

RETURNS DESCRIPTION
Tensor

torch.Tensor: The loss value.

Source code in srai/embedders/geovex/model.py
def training_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    # sourcery skip: class-extract-method
    """
    Perform a training step. This is called by PyTorch Lightning.

    One training step consists of a forward pass, a loss calculation, and a backward pass.

    Args:
        batch (List[torch.Tensor]): The batch of data.
        batch_idx (int): The index of the batch.

    Returns:
        torch.Tensor: The loss value.
    """
    loss = self._loss.forward(*self.forward(batch), batch)
    self.log("train_loss", loss, on_step=True, on_epoch=True)
    return loss

validation_step(batch, batch_idx)

Perform a validation step. This is called by PyTorch Lightning.

PARAMETER DESCRIPTION
batch

The batch of data.

TYPE: List[Tensor]

batch_idx

The index of the batch.

TYPE: int

RETURNS DESCRIPTION
Tensor

torch.Tensor: The loss value.

Source code in srai/embedders/geovex/model.py
def validation_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform a validation step. This is called by PyTorch Lightning.

    Args:
        batch (List[torch.Tensor]): The batch of data.
        batch_idx (int): The index of the batch.

    Returns:
        torch.Tensor: The loss value.
    """
    loss = self._loss.forward(*self.forward(batch), batch)
    self.log("validation_loss", loss, on_step=True, on_epoch=True)
    return loss

configure_optimizers()

Configure the optimizers. This is called by PyTorch Lightning.

RETURNS DESCRIPTION
list[Optimizer]

List[torch.optim.Optimizer]: The optimizers.

Source code in srai/embedders/geovex/model.py
def configure_optimizers(self) -> list["torch.optim.Optimizer"]:
    """
    Configure the optimizers. This is called by PyTorch Lightning.

    Returns:
        List[torch.optim.Optimizer]: The optimizers.
    """
    opt: torch.optim.Optimizer = torch.optim.Adam(
        self.parameters(),
        lr=self.lr,
    )
    return [opt]

get_config()

Get the model configuration.

RETURNS DESCRIPTION
dict[str, Union[int, float]]

Dict[str, Union[int, float]]: The model configuration.

Source code in srai/embedders/geovex/model.py
def get_config(self) -> dict[str, Union[int, float]]:
    """
    Get the model configuration.

    Returns:
        Dict[str, Union[int, float]]: The model configuration.
    """
    return {
        "k_dim": self.k_dim,
        "radius": self.R,
        "conv_layers": self.conv_layers,
        "emb_size": self.emb_size,
        "learning_rate": self.lr,
        "conv_layer_size": self.conv_layer_size,
    }