Skip to content

Index

Hex2Vec.

Hex2VecEmbedder(
    encoder_sizes=None, expected_output_features=None
)

Bases: CountEmbedder

Hex2Vec Embedder.

PARAMETER DESCRIPTION
encoder_sizes

Sizes of the encoder layers. The input layer size shouldn't be included - it's inferred from the data. The last element is the embedding size. Defaults to [150, 75, 50].

TYPE: List[int] DEFAULT: None

Source code in srai/embedders/hex2vec/embedder.py
def __init__(
    self,
    encoder_sizes: Optional[list[int]] = None,
    expected_output_features: Optional[
        Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter]
    ] = None,
) -> None:
    """
    Initialize Hex2VecEmbedder.

    Args:
        encoder_sizes (List[int], optional): Sizes of the encoder layers.
            The input layer size shouldn't be included - it's inferred from the data.
            The last element is the embedding size. Defaults to [150, 75, 50].
        expected_output_features
            (Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter], optional):
            List of expected output features. Defaults to None.
    """
    super().__init__(
        expected_output_features=expected_output_features, count_subcategories=True
    )
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )
    if encoder_sizes is None:
        encoder_sizes = Hex2VecEmbedder.DEFAULT_ENCODER_SIZES
    self._assert_encoder_sizes_correct(encoder_sizes)
    self._encoder_sizes = encoder_sizes
    self._model: Optional[Hex2VecModel] = None
    self._is_fitted = False

transform(regions_gdf, features_gdf, joint_gdf)

Create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

RETURNS DESCRIPTION
DataFrame

pd.DataFrame: Embedding and geometry index for each region in regions_gdf.

RAISES DESCRIPTION
ValueError

If features_gdf is empty and self.expected_output_features is not set.

ValueError

If any of the gdfs index names is None.

ValueError

If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.

ValueError

If index levels in gdfs don't overlap correctly.

Source code in srai/embedders/hex2vec/embedder.py
def transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
) -> pd.DataFrame:
    """
    Create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.

    Returns:
        pd.DataFrame: Embedding and geometry index for each region in regions_gdf.

    Raises:
        ValueError: If features_gdf is empty and self.expected_output_features is not set.
        ValueError: If any of the gdfs index names is None.
        ValueError: If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
        ValueError: If index levels in gdfs don't overlap correctly.
    """
    import torch

    self._check_is_fitted()
    counts_df = self._get_raw_counts(regions_gdf, features_gdf, joint_gdf)
    counts_tensor = torch.from_numpy(counts_df.values)
    embeddings = self._model(counts_tensor).detach().numpy()  # type: ignore
    return pd.DataFrame(embeddings, index=counts_df.index)

fit(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    negative_sample_k_distance=2,
    batch_size=32,
    learning_rate=0.001,
    trainer_kwargs=None,
)

Fit the model to the data.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

neighbourhood

The neighbourhood to use. Should be intialized with the same regions.

TYPE: Neighbourhood[T]

negative_sample_k_distance

When sampling negative samples, sample from a distance > k. Defaults to 2.

TYPE: int DEFAULT: 2

batch_size

Batch size. Defaults to 32.

TYPE: int DEFAULT: 32

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

RAISES DESCRIPTION
ValueError

If features_gdf is empty and self.expected_output_features is not set.

ValueError

If any of the gdfs index names is None.

ValueError

If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.

ValueError

If index levels in gdfs don't overlap correctly.

ValueError

If negative_sample_k_distance < 2.

Source code in srai/embedders/hex2vec/embedder.py
def fit(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: Neighbourhood[T],
    negative_sample_k_distance: int = 2,
    batch_size: int = 32,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> None:
    """
    Fit the model to the data.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (Neighbourhood[T]): The neighbourhood to use.
            Should be intialized with the same regions.
        negative_sample_k_distance (int, optional): When sampling negative samples,
            sample from a distance > k. Defaults to 2.
        batch_size (int, optional): Batch size. Defaults to 32.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. Defaults to None.

    Raises:
        ValueError: If features_gdf is empty and self.expected_output_features is not set.
        ValueError: If any of the gdfs index names is None.
        ValueError: If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
        ValueError: If index levels in gdfs don't overlap correctly.
        ValueError: If negative_sample_k_distance < 2.
    """
    import pytorch_lightning as pl
    from torch.utils.data import DataLoader

    trainer_kwargs = self._prepare_trainer_kwargs(trainer_kwargs)

    counts_df = self._get_raw_counts(regions_gdf, features_gdf, joint_gdf)

    if self.expected_output_features is None:  # type: ignore[has-type]
        self.expected_output_features = pd.Series(counts_df.columns)

    num_features = len(self.expected_output_features)  # type: ignore[arg-type]
    self._model = Hex2VecModel(
        layer_sizes=[num_features, *self._encoder_sizes], learning_rate=learning_rate
    )
    dataset = NeighbourDataset(counts_df, neighbourhood, negative_sample_k_distance)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    trainer = pl.Trainer(**trainer_kwargs)
    trainer.fit(self._model, dataloader)
    self._is_fitted = True

fit_transform(
    regions_gdf,
    features_gdf,
    joint_gdf,
    neighbourhood,
    negative_sample_k_distance=2,
    batch_size=32,
    learning_rate=0.001,
    trainer_kwargs=None,
)

Fit the model to the data and return the embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

joint_gdf

Joiner result with region-feature multi-index.

TYPE: GeoDataFrame

neighbourhood

The neighbourhood to use. Should be intialized with the same regions.

TYPE: Neighbourhood[T]

negative_sample_k_distance

When sampling negative samples, sample from a distance > k. Defaults to 2.

TYPE: int DEFAULT: 2

batch_size

Batch size. Defaults to 32.

TYPE: int DEFAULT: 32

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

RETURNS DESCRIPTION
DataFrame

pd.DataFrame: Region embeddings.

RAISES DESCRIPTION
ValueError

If features_gdf is empty and self.expected_output_features is not set.

ValueError

If any of the gdfs index names is None.

ValueError

If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.

ValueError

If index levels in gdfs don't overlap correctly.

ValueError

If negative_sample_k_distance < 2.

Source code in srai/embedders/hex2vec/embedder.py
def fit_transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    joint_gdf: gpd.GeoDataFrame,
    neighbourhood: Neighbourhood[T],
    negative_sample_k_distance: int = 2,
    batch_size: int = 32,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> pd.DataFrame:
    """
    Fit the model to the data and return the embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        joint_gdf (gpd.GeoDataFrame): Joiner result with region-feature multi-index.
        neighbourhood (Neighbourhood[T]): The neighbourhood to use.
            Should be intialized with the same regions.
        negative_sample_k_distance (int, optional): When sampling negative samples,
            sample from a distance > k. Defaults to 2.
        batch_size (int, optional): Batch size. Defaults to 32.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. Defaults to None.

    Returns:
        pd.DataFrame: Region embeddings.

    Raises:
        ValueError: If features_gdf is empty and self.expected_output_features is not set.
        ValueError: If any of the gdfs index names is None.
        ValueError: If joint_gdf.index is not of type pd.MultiIndex or doesn't have 2 levels.
        ValueError: If index levels in gdfs don't overlap correctly.
        ValueError: If negative_sample_k_distance < 2.
    """
    self.fit(
        regions_gdf,
        features_gdf,
        joint_gdf,
        neighbourhood,
        negative_sample_k_distance,
        batch_size,
        learning_rate,
        trainer_kwargs,
    )
    return self.transform(regions_gdf, features_gdf, joint_gdf)

save(path)

Save the model to a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Path

Source code in srai/embedders/hex2vec/embedder.py
def save(self, path: Union[Path, str]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Path): Path to the directory.
    """
    embedder_config = {
        "encoder_sizes": self._encoder_sizes,
        "expected_output_features": (
            self.expected_output_features.tolist()
            if self.expected_output_features is not None
            else None
        ),
    }
    self._save(path, embedder_config)

load(path)

classmethod

Load the model from a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Path

RETURNS DESCRIPTION
Hex2VecEmbedder

The loaded embedder.

TYPE: Hex2VecEmbedder

Source code in srai/embedders/hex2vec/embedder.py
@classmethod
def load(cls, path: Union[Path, str]) -> "Hex2VecEmbedder":
    """
    Load the model from a directory.

    Args:
        path (Path): Path to the directory.

    Returns:
        Hex2VecEmbedder: The loaded embedder.
    """
    return cls._load(path, Hex2VecModel)

Hex2VecModel(layer_sizes, learning_rate=0.001)

Bases: Model

Hex2Vec embedding model.

This class implements the embedding model from Hex2Vec paper. It is based on a skip-gram model with negative sampling and triplet-loss. The model takes vectors of numbers as input (raw counts of features) per region and outputs dense embeddings.

PARAMETER DESCRIPTION
layer_sizes

List of sizes for the model layers. The first element is the input size (number of features), the last element is the output (embedding) size.

TYPE: List[int]

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

RAISES DESCRIPTION
ValueError

If layer_sizes contains less than 2 elements.

Source code in srai/embedders/hex2vec/model.py
def __init__(self, layer_sizes: list[int], learning_rate: float = 0.001):
    """
    Initialize Hex2VecModel.

    Args:
        layer_sizes (List[int]): List of sizes for the model layers.
            The first element is the input size (number of features),
            the last element is the output (embedding) size.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.


    Raises:
        ValueError: If layer_sizes contains less than 2 elements.
    """
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )
    from torch import nn

    super().__init__()
    self.layer_sizes = layer_sizes
    self.learning_rate = learning_rate

    if len(layer_sizes) < 2:
        raise ValueError("layer_sizes must contain at least 2 elements")

    def create_layers(sizes: list[tuple[int, int]]) -> nn.Sequential:
        layers = []
        for i, (input_size, output_size) in enumerate(sizes):
            linear = nn.Linear(input_size, output_size)
            nn.init.xavier_uniform_(linear.weight)
            layers.append(nn.Linear(input_size, output_size))
            if i != len(sizes) - 1:
                layers.append(nn.ReLU())
        return nn.Sequential(*layers)

    sizes = list(zip(layer_sizes[:-1], layer_sizes[1:]))
    self.encoder = create_layers(sizes)

get_config()

Get model config.

Source code in srai/embedders/_base.py
def get_config(self) -> dict[str, Any]:
    """Get model config."""
    model_config = {
        k: v
        for k, v in vars(self).items()
        if k[0] != "_"
        and k
        not in (
            "training",
            "prepare_data_per_node",
            "allow_zero_length_dataloader_with_multiple_devices",
        )
    }

    return model_config

save(path)

Save the model to a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Path

Source code in srai/embedders/_base.py
def save(self, path: Union[Path, str]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Path): Path to the directory.
    """
    import torch

    torch.save(self.state_dict(), path)

load(path, **kwargs)

classmethod

Load model from a file.

PARAMETER DESCRIPTION
path

Path to the file.

TYPE: Union[Path, str]

**kwargs

Additional kwargs to pass to the model constructor.

TYPE: dict DEFAULT: {}

Source code in srai/embedders/_base.py
@classmethod
def load(cls, path: Union[Path, str], **kwargs: Any) -> "Model":
    """
    Load model from a file.

    Args:
        path (Union[Path, str]): Path to the file.
        **kwargs (dict): Additional kwargs to pass to the model constructor.
    """
    import torch

    if isinstance(path, str):
        path = Path(path)

    model = cls(**kwargs)
    model.load_state_dict(torch.load(path))
    return model

forward(X_anchor)

Calculate embedding for a region.

PARAMETER DESCRIPTION
X_anchor

Region features.

TYPE: Tensor

Source code in srai/embedders/hex2vec/model.py
def forward(self, X_anchor: "torch.Tensor") -> "torch.Tensor":
    """
    Calculate embedding for a region.

    Args:
        X_anchor (torch.Tensor): Region features.
    """
    return self.encoder(X_anchor)

predict_proba(X_anchor, X_context)

Predict the probability of X_anchor being neighbours with X_context.

X_anchor and X_context are assumed to have the same batch size. The probabilities are calculated in pairs, i.e. the first element of X_anchor is compared with the first element of X_context.

PARAMETER DESCRIPTION
X_anchor

Anchor regions.

TYPE: Tensor

X_context

Context regions.

TYPE: Tensor

Source code in srai/embedders/hex2vec/model.py
def predict_proba(self, X_anchor: "torch.Tensor", X_context: "torch.Tensor") -> "torch.Tensor":
    """
    Predict the probability of X_anchor being neighbours with X_context.

    X_anchor and X_context are assumed to have the same batch size.
    The probabilities are calculated in pairs, i.e. the first element of X_anchor
    is compared with the first element of X_context.

    Args:
        X_anchor (torch.Tensor): Anchor regions.
        X_context (torch.Tensor): Context regions.
    """
    from torch.nn.functional import sigmoid

    score = self.predict_scores(X_anchor, X_context)
    return sigmoid(score)

predict_scores(X_anchor, X_context)

Predict raw unnormalized scores of X_anchor being neighbours with X_context.

X_anchor and X_context are assumed to have the same batch size. The scores are calculated in pairs, i.e. the first element of X_anchor is compared with the first element of X_context. In order to get probabilities, use the sigmoid function.

PARAMETER DESCRIPTION
X_anchor

Anchor regions.

TYPE: Tensor

X_context

Context regions.

TYPE: Tensor

Source code in srai/embedders/hex2vec/model.py
def predict_scores(self, X_anchor: "torch.Tensor", X_context: "torch.Tensor") -> "torch.Tensor":
    """
    Predict raw unnormalized scores of X_anchor being neighbours with X_context.

    X_anchor and X_context are assumed to have the same batch size.
    The scores are calculated in pairs, i.e. the first element of X_anchor
    is compared with the first element of X_context.
    In order to get probabilities, use the sigmoid function.

    Args:
        X_anchor (torch.Tensor): Anchor regions.
        X_context (torch.Tensor): Context regions.
    """
    import torch

    X_anchor_em = self(X_anchor)
    X_context_em = self(X_context)
    score = torch.mul(X_anchor_em, X_context_em).sum(dim=1)
    return score

training_step(batch, batch_idx)

Perform one training step.

One batch of data consists of 3 tensors
  • X_anchor: Anchor regions.
  • X_positive: Positive regions. The regions assumed to be neighbours of the corresponding regions in X_anchor.
  • X_negative: Negative regions. The regions assumed to NOT be neighbours of the corresponding regions in X_anchor.
The regions in X_anchor, X_positive and X_negative are first embedded using the encoder.
After that, the dot product of the corresponding embeddings is calculated.
The loss is calculated as a binary cross-entropy between the dot product and the labels.
PARAMETER DESCRIPTION
batch

Batch of data.

TYPE: List[Tensor]

batch_idx

Batch index.

TYPE: int

Source code in srai/embedders/hex2vec/model.py
def training_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform one training step.

    One batch of data consists of 3 tensors:
            - X_anchor: Anchor regions.
            - X_positive: Positive regions. The regions assumed to be neighbours
                of the corresponding regions in X_anchor.
            - X_negative: Negative regions. The regions assumed to NOT be neighbours
                of the corresponding regions in X_anchor.
        The regions in X_anchor, X_positive and X_negative are first embedded using the encoder.
        After that, the dot product of the corresponding embeddings is calculated.
        The loss is calculated as a binary cross-entropy between the dot product and the labels.

    Args:
        batch (List[torch.Tensor]): Batch of data.
        batch_idx (int): Batch index.
    """
    import torch
    import torch.nn.functional as F
    from torchmetrics.functional import f1_score as f1

    X_anchor, X_positive, X_negative = batch
    scores_pos = self.predict_scores(X_anchor, X_positive)
    scores_neg = self.predict_scores(X_anchor, X_negative)

    scores = torch.cat([scores_pos, scores_neg])
    y_pos = torch.ones_like(scores_pos)
    y_neg = torch.zeros_like(scores_neg)
    y = torch.cat([y_pos, y_neg]).to(X_anchor)

    loss = F.binary_cross_entropy_with_logits(scores, y)
    f_score = f1(F.sigmoid(scores), y.int(), task="binary")
    self.log("train_loss", loss, on_step=True, on_epoch=True)
    self.log("train_f1", f_score, on_step=True, on_epoch=True)
    return loss

validation_step(batch, batch_idx)

Perform one validation step.

PARAMETER DESCRIPTION
batch

Batch of data.

TYPE: List[Tensor]

batch_idx

Batch index.

TYPE: int

Source code in srai/embedders/hex2vec/model.py
def validation_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform one validation step.

    Args:
        batch (List[torch.Tensor]): Batch of data.
        batch_idx (int): Batch index.
    """
    import torch
    import torch.nn.functional as F
    from torchmetrics.functional import f1_score as f1

    X_anchor, X_positive, X_negative = batch
    scores_pos = self.predict_scores(X_anchor, X_positive)
    scores_neg = self.predict_scores(X_anchor, X_negative)

    scores = torch.cat([scores_pos, scores_neg])
    y_pos = torch.ones_like(scores_pos)
    y_neg = torch.zeros_like(scores_neg)
    y = torch.cat([y_pos, y_neg]).to(X_anchor)

    loss = F.binary_cross_entropy_with_logits(scores, y)
    f_score = f1(F.sigmoid(scores), y.int(), task="binary")
    self.log("val_loss", loss, on_step=True, on_epoch=True)
    self.log("val_f1", f_score, on_step=True, on_epoch=True)
    return loss

configure_optimizers()

Configure optimizer.

Source code in srai/embedders/hex2vec/model.py
def configure_optimizers(self) -> "torch.optim.Optimizer":
    """Configure optimizer."""
    import torch

    return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

NeighbourDataset(
    data, neighbourhood, negative_sample_k_distance=2
)

Bases: Dataset[NeighbourDatasetItem], Generic[T]

Dataset for training a model to predict neighbours.

It works by returning triplets of regions: anchor, positive and negative. A model can be trained to predict that the anchor region is a neighbour of the positive region, and that it is not a neighbour of the negative region.

PARAMETER DESCRIPTION
data

Data to use for training. Raw counts of features in regions.

TYPE: DataFrame

neighbourhood

Neighbourhood to use for training. It has to be initialized with the same data as the data argument.

TYPE: Neighbourhood[T]

negative_sample_k_distance

How many neighbours away to sample negative regions. For example, if k=2, then the negative regions will be sampled from regions that are at least 3 hops away from the anchor region. Has to be >= 2.

TYPE: int DEFAULT: 2

RAISES DESCRIPTION
ValueError

If negative_sample_k_distance < 2.

Source code in srai/embedders/hex2vec/neighbour_dataset.py
def __init__(
    self,
    data: pd.DataFrame,
    neighbourhood: Neighbourhood[T],
    negative_sample_k_distance: int = 2,
):
    """
    Initialize NeighbourDataset.

    Args:
        data (pd.DataFrame): Data to use for training. Raw counts of features in regions.
        neighbourhood (Neighbourhood[T]): Neighbourhood to use for training.
            It has to be initialized with the same data as the data argument.
        negative_sample_k_distance (int): How many neighbours away to sample negative regions.
            For example, if k=2, then the negative regions will be sampled from regions that are
            at least 3 hops away from the anchor region. Has to be >= 2.

    Raises:
        ValueError: If negative_sample_k_distance < 2.
    """
    import_optional_dependencies(dependency_group="torch", modules=["torch"])
    import torch

    self._data = torch.Tensor(data.to_numpy())
    self._assert_negative_sample_k_distance_correct(negative_sample_k_distance)
    self._negative_sample_k_distance = negative_sample_k_distance

    self._anchor_df_locs_lookup: np.ndarray
    self._positive_df_locs_lookup: np.ndarray
    self._excluded_from_negatives: dict[int, set[int]] = {}

    self._region_index_to_df_loc: dict[T, int] = {
        region_index: i for i, region_index in enumerate(data.index)
    }
    self._df_loc_to_region_index: dict[int, T] = {
        i: region_index for region_index, i in self._region_index_to_df_loc.items()
    }

    self._build_lookup_tables(data, neighbourhood)

__len__()

Return the number of anchor-positive pairs available in the dataset.

RETURNS DESCRIPTION
int

The number of pairs.

TYPE: int

Source code in srai/embedders/hex2vec/neighbour_dataset.py
def __len__(self) -> int:
    """
    Return the number of anchor-positive pairs available in the dataset.

    Returns:
        int: The number of pairs.
    """
    return len(self._anchor_df_locs_lookup)

__getitem__(data_row_index)

Return a single dataset item (anchor, positive, negative).

PARAMETER DESCRIPTION
data_row_index

The index of the dataset item to return.

TYPE: Any

RETURNS DESCRIPTION
NeighbourDatasetItem

The dataset item. This includes the anchor region, positive region and arandomly sampled negative region.

TYPE: NeighbourDatasetItem

Source code in srai/embedders/hex2vec/neighbour_dataset.py
def __getitem__(self, data_row_index: Any) -> NeighbourDatasetItem:
    """
    Return a single dataset item (anchor, positive, negative).

    Args:
        data_row_index (Any): The index of the dataset item to return.

    Returns:
        NeighbourDatasetItem: The dataset item.
            This includes the anchor region, positive region
            and arandomly sampled negative region.
    """
    anchor_df_loc = self._anchor_df_locs_lookup[data_row_index]
    positive_df_loc = self._positive_df_locs_lookup[data_row_index]
    negative_df_loc = self._get_random_negative_df_loc(anchor_df_loc)

    anchor_region = self._data[anchor_df_loc]
    positive_region = self._data[positive_df_loc]
    negative_region = self._data[negative_df_loc]

    return NeighbourDatasetItem(anchor_region, positive_region, negative_region)

NeighbourDatasetItem

Bases: NamedTuple

Neighbour dataset item.

ATTRIBUTE DESCRIPTION
X_anchor

Anchor regions.

TYPE: Tensor

X_positive

Positive regions. Data for the regions that are neighbours of regions in X_anchor.

TYPE: Tensor

X_negative

Negative regions. Data for the regions that are NOT neighbours of the regions in X_anchor.

TYPE: Tensor