Skip to content

Model

Hex2Vec model.

This module contains the embedding model from Hex2Vec paper[1].

References

[1] https://dl.acm.org/doi/10.1145/3486635.3491076

Hex2VecModel(layer_sizes, learning_rate=0.001)

Bases: Model

Hex2Vec embedding model.

This class implements the embedding model from Hex2Vec paper. It is based on a skip-gram model with negative sampling and triplet-loss. The model takes vectors of numbers as input (raw counts of features) per region and outputs dense embeddings.

PARAMETER DESCRIPTION
layer_sizes

List of sizes for the model layers. The first element is the input size (number of features), the last element is the output (embedding) size.

TYPE: List[int]

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

RAISES DESCRIPTION
ValueError

If layer_sizes contains less than 2 elements.

Source code in srai/embedders/hex2vec/model.py
def __init__(self, layer_sizes: list[int], learning_rate: float = 0.001):
    """
    Initialize Hex2VecModel.

    Args:
        layer_sizes (List[int]): List of sizes for the model layers.
            The first element is the input size (number of features),
            the last element is the output (embedding) size.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.


    Raises:
        ValueError: If layer_sizes contains less than 2 elements.
    """
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning"]
    )
    from torch import nn

    super().__init__()
    self.layer_sizes = layer_sizes
    self.learning_rate = learning_rate

    if len(layer_sizes) < 2:
        raise ValueError("layer_sizes must contain at least 2 elements")

    def create_layers(sizes: list[tuple[int, int]]) -> nn.Sequential:
        layers = []
        for i, (input_size, output_size) in enumerate(sizes):
            linear = nn.Linear(input_size, output_size)
            nn.init.xavier_uniform_(linear.weight)
            layers.append(nn.Linear(input_size, output_size))
            if i != len(sizes) - 1:
                layers.append(nn.ReLU())
        return nn.Sequential(*layers)

    sizes = list(zip(layer_sizes[:-1], layer_sizes[1:]))
    self.encoder = create_layers(sizes)

get_config()

Get model config.

Source code in srai/embedders/_base.py
def get_config(self) -> dict[str, Any]:
    """Get model config."""
    model_config = {
        k: v
        for k, v in vars(self).items()
        if k[0] != "_"
        and k
        not in (
            "training",
            "prepare_data_per_node",
            "allow_zero_length_dataloader_with_multiple_devices",
        )
    }

    return model_config

save(path)

Save the model to a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Path

Source code in srai/embedders/_base.py
def save(self, path: Union[Path, str]) -> None:
    """
    Save the model to a directory.

    Args:
        path (Path): Path to the directory.
    """
    import torch

    torch.save(self.state_dict(), path)

load(path, **kwargs)

classmethod

Load model from a file.

PARAMETER DESCRIPTION
path

Path to the file.

TYPE: Union[Path, str]

**kwargs

Additional kwargs to pass to the model constructor.

TYPE: dict DEFAULT: {}

Source code in srai/embedders/_base.py
@classmethod
def load(cls, path: Union[Path, str], **kwargs: Any) -> "Model":
    """
    Load model from a file.

    Args:
        path (Union[Path, str]): Path to the file.
        **kwargs (dict): Additional kwargs to pass to the model constructor.
    """
    import torch

    if isinstance(path, str):
        path = Path(path)

    model = cls(**kwargs)
    model.load_state_dict(torch.load(path))
    return model

forward(X_anchor)

Calculate embedding for a region.

PARAMETER DESCRIPTION
X_anchor

Region features.

TYPE: Tensor

Source code in srai/embedders/hex2vec/model.py
def forward(self, X_anchor: "torch.Tensor") -> "torch.Tensor":
    """
    Calculate embedding for a region.

    Args:
        X_anchor (torch.Tensor): Region features.
    """
    return self.encoder(X_anchor)

predict_proba(X_anchor, X_context)

Predict the probability of X_anchor being neighbours with X_context.

X_anchor and X_context are assumed to have the same batch size. The probabilities are calculated in pairs, i.e. the first element of X_anchor is compared with the first element of X_context.

PARAMETER DESCRIPTION
X_anchor

Anchor regions.

TYPE: Tensor

X_context

Context regions.

TYPE: Tensor

Source code in srai/embedders/hex2vec/model.py
def predict_proba(self, X_anchor: "torch.Tensor", X_context: "torch.Tensor") -> "torch.Tensor":
    """
    Predict the probability of X_anchor being neighbours with X_context.

    X_anchor and X_context are assumed to have the same batch size.
    The probabilities are calculated in pairs, i.e. the first element of X_anchor
    is compared with the first element of X_context.

    Args:
        X_anchor (torch.Tensor): Anchor regions.
        X_context (torch.Tensor): Context regions.
    """
    from torch.nn.functional import sigmoid

    score = self.predict_scores(X_anchor, X_context)
    return sigmoid(score)

predict_scores(X_anchor, X_context)

Predict raw unnormalized scores of X_anchor being neighbours with X_context.

X_anchor and X_context are assumed to have the same batch size. The scores are calculated in pairs, i.e. the first element of X_anchor is compared with the first element of X_context. In order to get probabilities, use the sigmoid function.

PARAMETER DESCRIPTION
X_anchor

Anchor regions.

TYPE: Tensor

X_context

Context regions.

TYPE: Tensor

Source code in srai/embedders/hex2vec/model.py
def predict_scores(self, X_anchor: "torch.Tensor", X_context: "torch.Tensor") -> "torch.Tensor":
    """
    Predict raw unnormalized scores of X_anchor being neighbours with X_context.

    X_anchor and X_context are assumed to have the same batch size.
    The scores are calculated in pairs, i.e. the first element of X_anchor
    is compared with the first element of X_context.
    In order to get probabilities, use the sigmoid function.

    Args:
        X_anchor (torch.Tensor): Anchor regions.
        X_context (torch.Tensor): Context regions.
    """
    import torch

    X_anchor_em = self(X_anchor)
    X_context_em = self(X_context)
    score = torch.mul(X_anchor_em, X_context_em).sum(dim=1)
    return score

training_step(batch, batch_idx)

Perform one training step.

One batch of data consists of 3 tensors
  • X_anchor: Anchor regions.
  • X_positive: Positive regions. The regions assumed to be neighbours of the corresponding regions in X_anchor.
  • X_negative: Negative regions. The regions assumed to NOT be neighbours of the corresponding regions in X_anchor.
The regions in X_anchor, X_positive and X_negative are first embedded using the encoder.
After that, the dot product of the corresponding embeddings is calculated.
The loss is calculated as a binary cross-entropy between the dot product and the labels.
PARAMETER DESCRIPTION
batch

Batch of data.

TYPE: List[Tensor]

batch_idx

Batch index.

TYPE: int

Source code in srai/embedders/hex2vec/model.py
def training_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform one training step.

    One batch of data consists of 3 tensors:
            - X_anchor: Anchor regions.
            - X_positive: Positive regions. The regions assumed to be neighbours
                of the corresponding regions in X_anchor.
            - X_negative: Negative regions. The regions assumed to NOT be neighbours
                of the corresponding regions in X_anchor.
        The regions in X_anchor, X_positive and X_negative are first embedded using the encoder.
        After that, the dot product of the corresponding embeddings is calculated.
        The loss is calculated as a binary cross-entropy between the dot product and the labels.

    Args:
        batch (List[torch.Tensor]): Batch of data.
        batch_idx (int): Batch index.
    """
    import torch
    import torch.nn.functional as F
    from torchmetrics.functional import f1_score as f1

    X_anchor, X_positive, X_negative = batch
    scores_pos = self.predict_scores(X_anchor, X_positive)
    scores_neg = self.predict_scores(X_anchor, X_negative)

    scores = torch.cat([scores_pos, scores_neg])
    y_pos = torch.ones_like(scores_pos)
    y_neg = torch.zeros_like(scores_neg)
    y = torch.cat([y_pos, y_neg]).to(X_anchor)

    loss = F.binary_cross_entropy_with_logits(scores, y)
    f_score = f1(F.sigmoid(scores), y.int(), task="binary")
    self.log("train_loss", loss, on_step=True, on_epoch=True)
    self.log("train_f1", f_score, on_step=True, on_epoch=True)
    return loss

validation_step(batch, batch_idx)

Perform one validation step.

PARAMETER DESCRIPTION
batch

Batch of data.

TYPE: List[Tensor]

batch_idx

Batch index.

TYPE: int

Source code in srai/embedders/hex2vec/model.py
def validation_step(self, batch: list["torch.Tensor"], batch_idx: int) -> "torch.Tensor":
    """
    Perform one validation step.

    Args:
        batch (List[torch.Tensor]): Batch of data.
        batch_idx (int): Batch index.
    """
    import torch
    import torch.nn.functional as F
    from torchmetrics.functional import f1_score as f1

    X_anchor, X_positive, X_negative = batch
    scores_pos = self.predict_scores(X_anchor, X_positive)
    scores_neg = self.predict_scores(X_anchor, X_negative)

    scores = torch.cat([scores_pos, scores_neg])
    y_pos = torch.ones_like(scores_pos)
    y_neg = torch.zeros_like(scores_neg)
    y = torch.cat([y_pos, y_neg]).to(X_anchor)

    loss = F.binary_cross_entropy_with_logits(scores, y)
    f_score = f1(F.sigmoid(scores), y.int(), task="binary")
    self.log("val_loss", loss, on_step=True, on_epoch=True)
    self.log("val_f1", f_score, on_step=True, on_epoch=True)
    return loss

configure_optimizers()

Configure optimizer.

Source code in srai/embedders/hex2vec/model.py
def configure_optimizers(self) -> "torch.optim.Optimizer":
    """Configure optimizer."""
    import torch

    return torch.optim.Adam(self.parameters(), lr=self.learning_rate)