Index

Hex2Vec.

Hex2VecEmbedder(
    encoder_sizes=None, expected_output_features=None
)

¶

Bases: CountEmbedder

Hex2Vec Embedder.

PARAMETER	DESCRIPTION
`encoder_sizes`	Sizes of the encoder layers. The input layer size shouldn't be included - it's inferred from the data. The last element is the embedding size. Defaults to [150, 75, 50]. TYPE: `List[int]` DEFAULT: `None`

Source code in srai/embedders/hex2vec/embedder.py

def __init__(
    self,
    encoder_sizes: Optional[list[int]] = None,
    expected_output_features: Optional[
        Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter]
    ] = None,
) -> None:
    """
    Initialize Hex2VecEmbedder.

    Args:
        encoder_sizes (List[int], optional): Sizes of the encoder layers.
            The input layer size shouldn't be included - it's inferred from the data.
            The last element is the embedding size. Defaults to [150, 75, 50].
        expected_output_features
            (Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter], optional):
            List of expected output features. Defaults to None.
    """
    super().__init__(
        expected_output_features=expected_output_features, count_subcategories=True
    )
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )
    if encoder_sizes is None:
        encoder_sizes = Hex2VecEmbedder.DEFAULT_ENCODER_SIZES
    self._assert_encoder_sizes_correct(encoder_sizes)
    self._encoder_sizes = encoder_sizes
    self._model: Optional[Hex2VecModel] = None
    self._is_fitted = False

transform(regions_gdf, features_gdf, joint_gdf)

¶

Create region embeddings.

PARAMETER	DESCRIPTION
`regions_gdf`	Region indexes and geometries. TYPE: `GeoDataFrame`
`features_gdf`	Feature indexes, geometries and feature values. TYPE: `GeoDataFrame`
`joint_gdf`	Joiner result with region-feature multi-index. TYPE: `GeoDataFrame`

RETURNS	DESCRIPTION
`DataFrame`	pd.DataFrame: Embedding and geometry index for each region in regions_gdf.

RAISES	DESCRIPTION
`ValueError`	If features_gdf is empty and self.expected_output_features is not set.
`ValueError`	If any of the gdfs index names is None.
`ValueError`	If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
`ValueError`	If index levels in gdfs don't overlap correctly.

Source code in srai/embedders/hex2vec/embedder.py

def transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
) -> pd.DataFrame:
    """
    Create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.

    Returns:
        pd.DataFrame: Embedding and geometry index for each region in regions_gdf.

    Raises:
        ValueError: If features_gdf is empty and self.expected_output_features is not set.
        ValueError: If any of the gdfs index names is None.
        ValueError: If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
        ValueError: If index levels in gdfs don't overlap correctly.
    """
    import torch

    self._check_is_fitted()
    counts_df = self._get_raw_counts(regions_gdf, features_gdf, joint_gdf)
    counts_tensor = torch.from_numpy(counts_df.values)
    embeddings = self._model(counts_tensor).detach().numpy()  # type: ignore
    return pd.DataFrame(embeddings, index=counts_df.index)

fit(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    negative_sample_k_distance=2,
    batch_size=32,
    learning_rate=0.001,
    trainer_kwargs=None,
)

¶

Fit the model to the data.

PARAMETER	DESCRIPTION
`regions_gdf`	Region indexes and geometries. TYPE: `GeoDataFrame`
`features_gdf`	Feature indexes, geometries and feature values. TYPE: `GeoDataFrame`
`joint_gdf`	Joiner result with region-feature multi-index. TYPE: `GeoDataFrame`
`neighbourhood`	The neighbourhood to use. Should be intialized with the same regions. TYPE: `Neighbourhood[T]`
`negative_sample_k_distance`	When sampling negative samples, sample from a distance > k. Defaults to 2. TYPE: `int` DEFAULT: `2`
`batch_size`	Batch size. Defaults to 32. TYPE: `int` DEFAULT: `32`
`learning_rate`	Learning rate. Defaults to 0.001. TYPE: `float` DEFAULT: `0.001`
`trainer_kwargs`	Trainer kwargs. Defaults to None. TYPE: `Optional[Dict[str, Any]]` DEFAULT: `None`

RAISES	DESCRIPTION
`ValueError`	If features_gdf is empty and self.expected_output_features is not set.
`ValueError`	If any of the gdfs index names is None.
`ValueError`	If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
`ValueError`	If index levels in gdfs don't overlap correctly.
`ValueError`	If negative_sample_k_distance < 2.

Source code in srai/embedders/hex2vec/embedder.py

def fit(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: Neighbourhood[T],
    negative_sample_k_distance: int = 2,
    batch_size: int = 32,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> None:
    """
    Fit the model to the data.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (Neighbourhood[T]): The neighbourhood to use.
            Should be intialized with the same regions.
        negative_sample_k_distance (int, optional): When sampling negative samples,
            sample from a distance > k. Defaults to 2.
        batch_size (int, optional): Batch size. Defaults to 32.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. Defaults to None.

    Raises:
        ValueError: If features_gdf is empty and self.expected_output_features is not set.
        ValueError: If any of the gdfs index names is None.
        ValueError: If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
        ValueError: If index levels in gdfs don't overlap correctly.
        ValueError: If negative_sample_k_distance < 2.
    """
    import pytorch_lightning as pl
    from torch.utils.data import DataLoader

    trainer_kwargs = self._prepare_trainer_kwargs(trainer_kwargs)

    counts_df = self._get_raw_counts(regions_gdf, features_gdf, joint_gdf)

    if self.expected_output_features is None:  # type: ignore[has-type]
        self.expected_output_features = pd.Series(counts_df.columns)

    num_features = len(self.expected_output_features)  # type: ignore[arg-type]
    self._model = Hex2VecModel(
        layer_sizes=[num_features, *self._encoder_sizes], learning_rate=learning_rate
    )
    dataset = NeighbourDataset(counts_df, neighbourhood, negative_sample_k_distance)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    trainer = pl.Trainer(**trainer_kwargs)
    trainer.fit(self._model, dataloader)
    self._is_fitted = True

fit_transform(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    negative_sample_k_distance=2,
    batch_size=32,
    learning_rate=0.001,
    trainer_kwargs=None,
)

¶

Fit the model to the data and return the embeddings.

PARAMETER	DESCRIPTION
`regions_gdf`	Region indexes and geometries. TYPE: `GeoDataFrame`
`features_gdf`	Feature indexes, geometries and feature values. TYPE: `GeoDataFrame`
`joint_gdf`	Joiner result with region-feature multi-index. TYPE: `GeoDataFrame`
`neighbourhood`	The neighbourhood to use. Should be intialized with the same regions. TYPE: `Neighbourhood[T]`
`negative_sample_k_distance`	When sampling negative samples, sample from a distance > k. Defaults to 2. TYPE: `int` DEFAULT: `2`
`batch_size`	Batch size. Defaults to 32. TYPE: `int` DEFAULT: `32`
`learning_rate`	Learning rate. Defaults to 0.001. TYPE: `float` DEFAULT: `0.001`
`trainer_kwargs`	Trainer kwargs. Defaults to None. TYPE: `Optional[Dict[str, Any]]` DEFAULT: `None`

RETURNS	DESCRIPTION
`DataFrame`	pd.DataFrame: Region embeddings.

RAISES	DESCRIPTION
`ValueError`	If features_gdf is empty and self.expected_output_features is not set.
`ValueError`	If any of the gdfs index names is None.
`ValueError`	If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
`ValueError`	If index levels in gdfs don't overlap correctly.
`ValueError`	If negative_sample_k_distance < 2.

Source code in srai/embedders/hex2vec/embedder.py

def fit_transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: Neighbourhood[T],
    negative_sample_k_distance: int = 2,
    batch_size: int = 32,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> pd.DataFrame:
    """
    Fit the model to the data and return the embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (Neighbourhood[T]): The neighbourhood to use.
            Should be intialized with the same regions.
        negative_sample_k_distance (int, optional): When sampling negative samples,
            sample from a distance > k. Defaults to 2.
        batch_size (int, optional): Batch size. Defaults to 32.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. Defaults to None.

    Returns:
        pd.DataFrame: Region embeddings.

    Raises:
        ValueError: If features_gdf is empty and self.expected_output_features is not set.
        ValueError: If any of the gdfs index names is None.
        ValueError: If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
        ValueError: If index levels in gdfs don't overlap correctly.
        ValueError: If negative_sample_k_distance < 2.
    """
    self.fit(
        regions_gdf,
        features_gdf,
        joint_gdf,
        neighbourhood,
        negative_sample_k_distance,
        batch_size,
        learning_rate,
        trainer_kwargs,
    )
    return self.transform(regions_gdf, features_gdf, joint_gdf)

save(path)

¶

Save the model to a directory.

PARAMETER	DESCRIPTION
`path`	Path to the directory. TYPE: `Path`

Source code in srai/embedders/hex2vec/embedder.py

def save(self, path: Union[Path, str]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Path): Path to the directory.
    """
    embedder_config = {
        "encoder_sizes": self._encoder_sizes,
        "expected_output_features": (
            self.expected_output_features.tolist()
            if self.expected_output_features is not None
            else None
        ),
    }
    self._save(path, embedder_config)

load(path)

¶

classmethod

Load the model from a directory.

PARAMETER	DESCRIPTION
`path`	Path to the directory. TYPE: `Path`

RETURNS	DESCRIPTION
`Hex2VecEmbedder`	The loaded embedder. TYPE: `Hex2VecEmbedder`

Source code in srai/embedders/hex2vec/embedder.py

@classmethod
def load(cls, path: Union[Path, str]) -> "Hex2VecEmbedder":
    """
    Load the model from a directory.

    Args:
        path (Path): Path to the directory.

    Returns:
        Hex2VecEmbedder: The loaded embedder.
    """
    return cls._load(path, Hex2VecModel)

Hex2VecModel(layer_sizes, learning_rate=0.001)

¶

Bases: Model

Hex2Vec embedding model.

This class implements the embedding model from Hex2Vec paper. It is based on a skip-gram model with negative sampling and triplet-loss. The model takes vectors of numbers as input (raw counts of features) per region and outputs dense embeddings.

PARAMETER	DESCRIPTION
`layer_sizes`	List of sizes for the model layers. The first element is the input size (number of features), the last element is the output (embedding) size. TYPE: `List[int]`
`learning_rate`	Learning rate. Defaults to 0.001. TYPE: `float` DEFAULT: `0.001`

RAISES	DESCRIPTION
`ValueError`	If layer_sizes contains less than 2 elements.

Source code in srai/embedders/hex2vec/model.py

def __init__(self, layer_sizes: list[int], learning_rate: float = 0.001):
    """
    Initialize Hex2VecModel.

    Args:
        layer_sizes (List[int]): List of sizes for the model layers.
            The first element is the input size (number of features),
            the last element is the output (embedding) size.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.


    Raises:
        ValueError: If layer_sizes contains less than 2 elements.
    """
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )
    from torch import nn

    super().__init__()
    self.layer_sizes = layer_sizes
    self.learning_rate = learning_rate

    if len(layer_sizes) < 2:
        raise ValueError("layer_sizes must contain at least 2 elements")

    def create_layers(sizes: list[tuple[int, int]]) -> nn.Sequential:
        layers = []
        for i, (input_size, output_size) in enumerate(sizes):
            linear = nn.Linear(input_size, output_size)
            nn.init.xavier_uniform_(linear.weight)
            layers.append(nn.Linear(input_size, output_size))
            if i != len(sizes) - 1:
                layers.append(nn.ReLU())
        return nn.Sequential(*layers)

    sizes = list(zip(layer_sizes[:-1], layer_sizes[1:]))
    self.encoder = create_layers(sizes)

get_config()

¶

Get model config.

Source code in srai/embedders/_base.py

def get_config(self) -> dict[str, Any]:
    """Get model config."""
    model_config = {
        k: v
        for k, v in vars(self).items()
        if k[0] != "_"
        and k
        not in (
            "training",
            "prepare_data_per_node",
            "allow_zero_length_dataloader_with_multiple_devices",
        )
    }

    return model_config

save(path)

¶

Save the model to a directory.

PARAMETER	DESCRIPTION
`path`	Path to the directory. TYPE: `Path`

Source code in srai/embedders/_base.py

def save(self, path: Union[Path, str]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Path): Path to the directory.
    """
    import torch

    torch.save(self.state_dict(), path)

load(path, **kwargs)

¶

classmethod

Load model from a file.

PARAMETER	DESCRIPTION
`path`	Path to the file. TYPE: `Union[Path, str]`
`**kwargs`	Additional kwargs to pass to the model constructor. TYPE: `dict` DEFAULT: `{}`

Source code in srai/embedders/_base.py

@classmethod
def load(cls, path: Union[Path, str], **kwargs: Any) -> "Model":
    """
    Load model from a file.

    Args:
        path (Union[Path, str]): Path to the file.
        **kwargs (dict): Additional kwargs to pass to the model constructor.
    """
    import torch

    if isinstance(path, str):
        path = Path(path)

    model = cls(**kwargs)
    model.load_state_dict(torch.load(path))
    return model

forward(X_anchor)

¶

Calculate embedding for a region.

PARAMETER	DESCRIPTION
`X_anchor`	Region features. TYPE: `Tensor`

Source code in srai/embedders/hex2vec/model.py

def forward(self, X_anchor: "torch.Tensor") -> "torch.Tensor":
    """
    Calculate embedding for a region.

    Args:
        X_anchor (torch.Tensor): Region features.
    """
    return self.encoder(X_anchor)

predict_proba(X_anchor, X_context)

¶

Predict the probability of X_anchor being neighbours with X_context.

X_anchor and X_context are assumed to have the same batch size. The probabilities are calculated in pairs, i.e. the first element of X_anchor is compared with the first element of X_context.

PARAMETER	DESCRIPTION
`X_anchor`	Anchor regions. TYPE: `Tensor`
`X_context`	Context regions. TYPE: `Tensor`

Source code in srai/embedders/hex2vec/model.py

def predict_proba(self, X_anchor: "torch.Tensor", X_context: "torch.Tensor") -> "torch.Tensor":
    """
    Predict the probability of X_anchor being neighbours with X_context.

    X_anchor and X_context are assumed to have the same batch size.
    The probabilities are calculated in pairs, i.e. the first element of X_anchor
    is compared with the first element of X_context.

    Args:
        X_anchor (torch.Tensor): Anchor regions.
        X_context (torch.Tensor): Context regions.
    """
    from torch.nn.functional import sigmoid

    score = self.predict_scores(X_anchor, X_context)
    return sigmoid(score)

predict_scores(X_anchor, X_context)

¶

Predict raw unnormalized scores of X_anchor being neighbours with X_context.

X_anchor and X_context are assumed to have the same batch size. The scores are calculated in pairs, i.e. the first element of X_anchor is compared with the first element of X_context. In order to get probabilities, use the sigmoid function.

PARAMETER	DESCRIPTION
`X_anchor`	Anchor regions. TYPE: `Tensor`
`X_context`	Context regions. TYPE: `Tensor`

Source code in srai/embedders/hex2vec/model.py

def predict_scores(self, X_anchor: "torch.Tensor", X_context: "torch.Tensor") -> "torch.Tensor":
    """
    Predict raw unnormalized scores of X_anchor being neighbours with X_context.

    X_anchor and X_context are assumed to have the same batch size.
    The scores are calculated in pairs, i.e. the first element of X_anchor
    is compared with the first element of X_context.
    In order to get probabilities, use the sigmoid function.

    Args:
        X_anchor (torch.Tensor): Anchor regions.
        X_context (torch.Tensor): Context regions.
    """
    import torch

    X_anchor_em = self(X_anchor)
    X_context_em = self(X_context)
    score = torch.mul(X_anchor_em, X_context_em).sum(dim=1)
    return score

training_step(batch, batch_idx)

¶

Perform one training step.

One batch of data consists of 3 tensors

X_anchor: Anchor regions.
X_positive: Positive regions. The regions assumed to be neighbours of the corresponding regions in X_anchor.
X_negative: Negative regions. The regions assumed to NOT be neighbours of the corresponding regions in X_anchor.

The regions in X_anchor, X_positive and X_negative are first embedded using the encoder.
After that, the dot product of the corresponding embeddings is calculated.
The loss is calculated as a binary cross-entropy between the dot product and the labels.

PARAMETER	DESCRIPTION
`batch`	Batch of data. TYPE: `List[Tensor]`
`batch_idx`	Batch index. TYPE: `int`

Source code in srai/embedders/hex2vec/model.py

def training_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform one training step.

    One batch of data consists of 3 tensors:
            - X_anchor: Anchor regions.
            - X_positive: Positive regions. The regions assumed to be neighbours
                of the corresponding regions in X_anchor.
            - X_negative: Negative regions. The regions assumed to NOT be neighbours
                of the corresponding regions in X_anchor.
        The regions in X_anchor, X_positive and X_negative are first embedded using the encoder.
        After that, the dot product of the corresponding embeddings is calculated.
        The loss is calculated as a binary cross-entropy between the dot product and the labels.

    Args:
        batch (List[torch.Tensor]): Batch of data.
        batch_idx (int): Batch index.
    """
    import torch
    import torch.nn.functional as F
    from torchmetrics.functional import f1_score as f1

    X_anchor, X_positive, X_negative = batch
    scores_pos = self.predict_scores(X_anchor, X_positive)
    scores_neg = self.predict_scores(X_anchor, X_negative)

    scores = torch.cat([scores_pos, scores_neg])
    y_pos = torch.ones_like(scores_pos)
    y_neg = torch.zeros_like(scores_neg)
    y = torch.cat([y_pos, y_neg]).to(X_anchor)

    loss = F.binary_cross_entropy_with_logits(scores, y)
    f_score = f1(F.sigmoid(scores), y.int(), task="binary")
    self.log("train_loss", loss, on_step=True, on_epoch=True)
    self.log("train_f1", f_score, on_step=True, on_epoch=True)
    return loss

validation_step(batch, batch_idx)

¶

Perform one validation step.

PARAMETER	DESCRIPTION
`batch`	Batch of data. TYPE: `List[Tensor]`
`batch_idx`	Batch index. TYPE: `int`

Source code in srai/embedders/hex2vec/model.py

def validation_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform one validation step.

    Args:
        batch (List[torch.Tensor]): Batch of data.
        batch_idx (int): Batch index.
    """
    import torch
    import torch.nn.functional as F
    from torchmetrics.functional import f1_score as f1

    X_anchor, X_positive, X_negative = batch
    scores_pos = self.predict_scores(X_anchor, X_positive)
    scores_neg = self.predict_scores(X_anchor, X_negative)

    scores = torch.cat([scores_pos, scores_neg])
    y_pos = torch.ones_like(scores_pos)
    y_neg = torch.zeros_like(scores_neg)
    y = torch.cat([y_pos, y_neg]).to(X_anchor)

    loss = F.binary_cross_entropy_with_logits(scores, y)
    f_score = f1(F.sigmoid(scores), y.int(), task="binary")
    self.log("val_loss", loss, on_step=True, on_epoch=True)
    self.log("val_f1", f_score, on_step=True, on_epoch=True)
    return loss

configure_optimizers()

¶

Configure optimizer.

Source code in srai/embedders/hex2vec/model.py

def configure_optimizers(self) -> "torch.optim.Optimizer":
    """Configure optimizer."""
    import torch

    return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

NeighbourDataset(
    data, neighbourhood, negative_sample_k_distance=2
)

¶

Bases: Dataset[NeighbourDatasetItem], Generic[T]

Dataset for training a model to predict neighbours.

It works by returning triplets of regions: anchor, positive and negative. A model can be trained to predict that the anchor region is a neighbour of the positive region, and that it is not a neighbour of the negative region.

PARAMETER	DESCRIPTION
`data`	Data to use for training. Raw counts of features in regions. TYPE: `DataFrame`
`neighbourhood`	Neighbourhood to use for training. It has to be initialized with the same data as the data argument. TYPE: `Neighbourhood[T]`
`negative_sample_k_distance`	How many neighbours away to sample negative regions. For example, if k=2, then the negative regions will be sampled from regions that are at least 3 hops away from the anchor region. Has to be >= 2. TYPE: `int` DEFAULT: `2`

RAISES	DESCRIPTION
`ValueError`	If negative_sample_k_distance < 2.

Source code in srai/embedders/hex2vec/neighbour_dataset.py

def __init__(
    self,
    data: pd.DataFrame,
    neighbourhood: Neighbourhood[T],
    negative_sample_k_distance: int = 2,
):
    """
    Initialize NeighbourDataset.

    Args:
        data (pd.DataFrame): Data to use for training. Raw counts of features in regions.
        neighbourhood (Neighbourhood[T]): Neighbourhood to use for training.
            It has to be initialized with the same data as the data argument.
        negative_sample_k_distance (int): How many neighbours away to sample negative regions.
            For example, if k=2, then the negative regions will be sampled from regions that are
            at least 3 hops away from the anchor region. Has to be >= 2.

    Raises:
        ValueError: If negative_sample_k_distance < 2.
    """
    import_optional_dependencies(dependency_group="torch", modules=["torch"])
    import torch

    self._data = torch.Tensor(data.to_numpy())
    self._assert_negative_sample_k_distance_correct(negative_sample_k_distance)
    self._negative_sample_k_distance = negative_sample_k_distance

    self._anchor_df_locs_lookup: np.ndarray
    self._positive_df_locs_lookup: np.ndarray
    self._excluded_from_negatives: dict[int, set[int]] = {}

    self._region_index_to_df_loc: dict[T, int] = {
        region_index: i for i, region_index in enumerate(data.index)
    }
    self._df_loc_to_region_index: dict[int, T] = {
        i: region_index for region_index, i in self._region_index_to_df_loc.items()
    }

    self._build_lookup_tables(data, neighbourhood)

__len__()

¶

Return the number of anchor-positive pairs available in the dataset.

RETURNS	DESCRIPTION
`int`	The number of pairs. TYPE: `int`

Source code in srai/embedders/hex2vec/neighbour_dataset.py

def __len__(self) -> int:
    """
    Return the number of anchor-positive pairs available in the dataset.

    Returns:
        int: The number of pairs.
    """
    return len(self._anchor_df_locs_lookup)

__getitem__(data_row_index)

¶

Return a single dataset item (anchor, positive, negative).

PARAMETER	DESCRIPTION
`data_row_index`	The index of the dataset item to return. TYPE: `Any`

RETURNS	DESCRIPTION
`NeighbourDatasetItem`	The dataset item. This includes the anchor region, positive region and arandomly sampled negative region. TYPE: `NeighbourDatasetItem`

Source code in srai/embedders/hex2vec/neighbour_dataset.py

def __getitem__(self, data_row_index: Any) -> NeighbourDatasetItem:
    """
    Return a single dataset item (anchor, positive, negative).

    Args:
        data_row_index (Any): The index of the dataset item to return.

    Returns:
        NeighbourDatasetItem: The dataset item.
            This includes the anchor region, positive region
            and arandomly sampled negative region.
    """
    anchor_df_loc = self._anchor_df_locs_lookup[data_row_index]
    positive_df_loc = self._positive_df_locs_lookup[data_row_index]
    negative_df_loc = self._get_random_negative_df_loc(anchor_df_loc)

    anchor_region = self._data[anchor_df_loc]
    positive_region = self._data[positive_df_loc]
    negative_region = self._data[negative_df_loc]

    return NeighbourDatasetItem(anchor_region, positive_region, negative_region)

NeighbourDatasetItem

¶

Bases: NamedTuple

Neighbour dataset item.

ATTRIBUTE	DESCRIPTION
`X_anchor`	Anchor regions. TYPE: `Tensor`
`X_positive`	Positive regions. Data for the regions that are neighbours of regions in X_anchor. TYPE: `Tensor`
`X_negative`	Negative regions. Data for the regions that are NOT neighbours of the regions in X_anchor. TYPE: `Tensor`

Index

`Hex2VecEmbedder( encoder_sizes=None, expected_output_features=None )`
¶

`transform(regions_gdf, features_gdf, joint_gdf)`
¶

`fit( regions_gdf, features_gdf, joint_gdf, neighbourhood, negative_sample_k_distance=2, batch_size=32, learning_rate=0.001, trainer_kwargs=None, )`
¶

`fit_transform( regions_gdf, features_gdf, joint_gdf, neighbourhood, negative_sample_k_distance=2, batch_size=32, learning_rate=0.001, trainer_kwargs=None, )`
¶

`save(path)`
¶

`load(path)`
¶

`Hex2VecModel(layer_sizes, learning_rate=0.001)`
¶

`get_config()`
¶

`save(path)`
¶

`load(path, **kwargs)`
¶

`forward(X_anchor)`
¶

`predict_proba(X_anchor, X_context)`
¶

`predict_scores(X_anchor, X_context)`
¶

`training_step(batch, batch_idx)`
¶

`validation_step(batch, batch_idx)`
¶

`configure_optimizers()`
¶

`NeighbourDataset( data, neighbourhood, negative_sample_k_distance=2 )`
¶

`len()`
¶

`getitem(data_row_index)`
¶

`NeighbourDatasetItem`
¶

Index

Hex2VecEmbedder( encoder_sizes=None, expected_output_features=None ) ¶

transform(regions_gdf, features_gdf, joint_gdf) ¶

fit( regions_gdf, features_gdf, joint_gdf, neighbourhood, negative_sample_k_distance=2, batch_size=32, learning_rate=0.001, trainer_kwargs=None, ) ¶

fit_transform( regions_gdf, features_gdf, joint_gdf, neighbourhood, negative_sample_k_distance=2, batch_size=32, learning_rate=0.001, trainer_kwargs=None, ) ¶

save(path) ¶

load(path) ¶

Hex2VecModel(layer_sizes, learning_rate=0.001) ¶

get_config() ¶

save(path) ¶

load(path, **kwargs) ¶

forward(X_anchor) ¶

predict_proba(X_anchor, X_context) ¶

predict_scores(X_anchor, X_context) ¶

training_step(batch, batch_idx) ¶

validation_step(batch, batch_idx) ¶

configure_optimizers() ¶

NeighbourDataset( data, neighbourhood, negative_sample_k_distance=2 ) ¶

__len__() ¶

__getitem__(data_row_index) ¶

NeighbourDatasetItem ¶

`Hex2VecEmbedder( encoder_sizes=None, expected_output_features=None )`
¶

`transform(regions_gdf, features_gdf, joint_gdf)`
¶

`fit( regions_gdf, features_gdf, joint_gdf, neighbourhood, negative_sample_k_distance=2, batch_size=32, learning_rate=0.001, trainer_kwargs=None, )`
¶

`fit_transform( regions_gdf, features_gdf, joint_gdf, neighbourhood, negative_sample_k_distance=2, batch_size=32, learning_rate=0.001, trainer_kwargs=None, )`
¶

`save(path)`
¶

`load(path)`
¶

`Hex2VecModel(layer_sizes, learning_rate=0.001)`
¶

`get_config()`
¶

`save(path)`
¶

`load(path, **kwargs)`
¶

`forward(X_anchor)`
¶

`predict_proba(X_anchor, X_context)`
¶

`predict_scores(X_anchor, X_context)`
¶

`training_step(batch, batch_idx)`
¶

`validation_step(batch, batch_idx)`
¶

`configure_optimizers()`
¶

`NeighbourDataset( data, neighbourhood, negative_sample_k_distance=2 )`
¶

`len()`
¶

`getitem(data_row_index)`
¶

`NeighbourDatasetItem`
¶