Skip to content

S2VecEmbedder

srai.embedders.S2VecEmbedder

S2VecEmbedder(
    target_features: Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter],
    count_subcategories: bool = True,
    batch_size: Optional[int] = 64,
    img_res: int = 8,
    patch_res: int = 12,
    num_heads: int = 8,
    encoder_layers: int = 6,
    decoder_layers: int = 2,
    embedding_dim: int = 256,
    decoder_dim: int = 128,
    mask_ratio: float = 0.75,
    dropout_prob: float = 0.2,
)

Bases: CountEmbedder

S2Vec Embedder.

PARAMETER DESCRIPTION
target_features

The features that are to be used in the embedding. Should be in "flat" format, i.e. "_", or use OsmTagsFilter object.

TYPE: Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter]

count_subcategories

Whether to count all subcategories individually or count features only on the highest level based on features column name. Defaults to True.

TYPE: bool DEFAULT: True

batch_size

Batch size. Defaults to 64.

TYPE: int DEFAULT: 64

img_res

Image resolution. Defaults to 8.

TYPE: int DEFAULT: 8

patch_res

Patch resolution. Defaults to 12.

TYPE: int DEFAULT: 12

num_heads

Number of heads in the transformer. Defaults to 8.

TYPE: int DEFAULT: 8

encoder_layers

Number of encoder layers in the transformer. Defaults to 6.

TYPE: int DEFAULT: 6

decoder_layers

Number of decoder layers in the transformer. Defaults to 2.

TYPE: int DEFAULT: 2

embedding_dim

Embedding dimension. Defaults to 256.

TYPE: int DEFAULT: 256

decoder_dim

Decoder dimension. Defaults to 128.

TYPE: int DEFAULT: 128

mask_ratio

Mask ratio for the transformer. Defaults to 0.75.

TYPE: float DEFAULT: 0.75

dropout_prob

The dropout probability. Defaults to 0.2.

TYPE: float DEFAULT: 0.2

Source code in srai/embedders/s2vec/embedder.py
def __init__(
    self,
    target_features: Union[list[str], OsmTagsFilter, GroupedOsmTagsFilter],
    count_subcategories: bool = True,
    batch_size: Optional[int] = 64,
    img_res: int = 8,
    patch_res: int = 12,
    num_heads: int = 8,
    encoder_layers: int = 6,
    decoder_layers: int = 2,
    embedding_dim: int = 256,
    decoder_dim: int = 128,
    mask_ratio: float = 0.75,
    dropout_prob: float = 0.2,
) -> None:
    """
    Initialize S2Vec Embedder.

    Args:
        target_features (Union[List[str], OsmTagsFilter, GroupedOsmTagsFilter]): The features
            that are to be used in the embedding. Should be in "flat" format,
            i.e. "<super-tag>_<sub-tag>", or use OsmTagsFilter object.
        count_subcategories (bool, optional): Whether to count all subcategories individually
            or count features only on the highest level based on features column name.
            Defaults to True.
        batch_size (int, optional): Batch size. Defaults to 64.
        img_res (int, optional): Image resolution. Defaults to 8.
        patch_res (int, optional): Patch resolution. Defaults to 12.
        num_heads (int, optional): Number of heads in the transformer. Defaults to 8.
        encoder_layers (int, optional): Number of encoder layers in the transformer.
            Defaults to 6.
        decoder_layers (int, optional): Number of decoder layers in the transformer.
            Defaults to 2.
        embedding_dim (int, optional): Embedding dimension. Defaults to 256.
        decoder_dim (int, optional): Decoder dimension. Defaults to 128.
        mask_ratio (float, optional): Mask ratio for the transformer. Defaults to 0.75.
        dropout_prob (float, optional): The dropout probability. Defaults to 0.2.
    """
    import_optional_dependencies(
        dependency_group="torch", modules=["torch", "pytorch_lightning", "timm"]
    )

    super().__init__(
        expected_output_features=target_features,
        count_subcategories=count_subcategories,
    )

    assert 0.0 <= mask_ratio <= 1.0, "Mask ratio must be between 0 and 1."
    assert 0.0 <= dropout_prob <= 1.0, "Dropout probability must be between 0 and 1."

    self._model: Optional[S2VecModel] = None
    self._is_fitted = False
    self._img_res = img_res
    self._patch_res = patch_res
    self.img_size = 2 ** (patch_res - img_res)
    self._num_heads = num_heads
    self._encoder_layers = encoder_layers
    self._decoder_layers = decoder_layers
    self._embedding_dim = embedding_dim
    self._decoder_dim = decoder_dim
    self._mask_ratio = mask_ratio
    self._dropout_prob = dropout_prob

    self._batch_size = batch_size

    self._dataset: DataLoader = None

fit

fit(
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> None

Fit the model to the data.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. This is where the number of epochs can be set. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

Source code in srai/embedders/s2vec/embedder.py
def fit(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> None:
    """
    Fit the model to the data.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs.
            This is where the number of epochs can be set. Defaults to None.
    """
    import pytorch_lightning as pl

    trainer_kwargs = self._prepare_trainer_kwargs(trainer_kwargs)
    counts_df, dataloader, dataset = self._prepare_dataset(  # type: ignore
        regions_gdf,
        features_gdf,
        self._batch_size,
        shuffle=True,
        is_fitting=True,
    )

    self._prepare_model(counts_df, learning_rate)

    trainer = pl.Trainer(**trainer_kwargs)
    trainer.fit(self._model, dataloader)
    self._is_fitted = True
    self._dataset = dataset

fit_transform

fit_transform(
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> pd.DataFrame

Fit the model to the data and create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

learning_rate

Learning rate. Defaults to 0.001.

TYPE: float DEFAULT: 0.001

trainer_kwargs

Trainer kwargs. This is where the number of epochs can be set. Defaults to None.

TYPE: Optional[Dict[str, Any]] DEFAULT: None

Source code in srai/embedders/s2vec/embedder.py
def fit_transform(
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
    learning_rate: float = 0.001,
    trainer_kwargs: Optional[dict[str, Any]] = None,
) -> pd.DataFrame:
    """
    Fit the model to the data and create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.
        learning_rate (float, optional): Learning rate. Defaults to 0.001.
        trainer_kwargs (Optional[Dict[str, Any]], optional): Trainer kwargs. This is where the
            number of epochs can be set. Defaults to None.
    """
    self.fit(
        regions_gdf=regions_gdf,
        features_gdf=features_gdf,
        learning_rate=learning_rate,
        trainer_kwargs=trainer_kwargs,
    )
    assert self._dataset is not None  # for mypy
    return self._transform(dataset=self._dataset)

load

classmethod
load(path: Union[Path, str]) -> S2VecEmbedder

Load the model from a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Union[Path, str]

model_module

Model class.

TYPE: type[ModelT]

RETURNS DESCRIPTION
S2VecEmbedder

S2VecEmbedder object.

TYPE: S2VecEmbedder

Source code in srai/embedders/s2vec/embedder.py
@classmethod
def load(cls, path: Union[Path, str]) -> "S2VecEmbedder":
    """
    Load the model from a directory.

    Args:
        path (Union[Path, str]): Path to the directory.
        model_module (type[ModelT]): Model class.

    Returns:
        S2VecEmbedder: S2VecEmbedder object.
    """
    return cls._load(path, S2VecModel)

save

save(path: Union[str, Any]) -> None

Save the S2VecEmbedder model to a directory.

PARAMETER DESCRIPTION
path

Path to the directory.

TYPE: Union[str, Any]

Source code in srai/embedders/s2vec/embedder.py
def save(self, path: Union[str, Any]) -> None:
    """
    Save the S2VecEmbedder model to a directory.

    Args:
        path (Union[str, Any]): Path to the directory.
    """
    embedder_config = {
        "target_features": cast("pd.Series", self.expected_output_features).to_json(
            orient="records"
        ),
        "count_subcategories": self.count_subcategories,
        "batch_size": self._batch_size,
        "img_res": self._img_res,
        "patch_res": self._patch_res,
        "num_heads": self._num_heads,
        "encoder_layers": self._encoder_layers,
        "decoder_layers": self._decoder_layers,
        "embedding_dim": self._embedding_dim,
        "decoder_dim": self._decoder_dim,
        "mask_ratio": self._mask_ratio,
        "dropout_prob": self._dropout_prob,
    }

    normalisation_config = {
        "feature_means": self._feature_means.tolist(),
        "feature_stds": self._feature_stds.tolist(),
        "empty_features_mask": self._empty_features_mask.tolist(),
    }

    self._save(path, embedder_config, normalisation_config)

transform

transform(
    regions_gdf: gpd.GeoDataFrame, features_gdf: gpd.GeoDataFrame
) -> pd.DataFrame

Create region embeddings.

PARAMETER DESCRIPTION
regions_gdf

Region indexes and geometries.

TYPE: GeoDataFrame

features_gdf

Feature indexes, geometries and feature values.

TYPE: GeoDataFrame

RETURNS DESCRIPTION
DataFrame

pd.DataFrame: Region embeddings.

Source code in srai/embedders/s2vec/embedder.py
def transform(  # type: ignore[override]
    self,
    regions_gdf: gpd.GeoDataFrame,
    features_gdf: gpd.GeoDataFrame,
) -> pd.DataFrame:
    """
    Create region embeddings.

    Args:
        regions_gdf (gpd.GeoDataFrame): Region indexes and geometries.
        features_gdf (gpd.GeoDataFrame): Feature indexes, geometries and feature values.

    Returns:
        pd.DataFrame: Region embeddings.
    """
    self._check_is_fitted()

    _, dataloader, self._dataset = self._prepare_dataset(
        regions_gdf,
        features_gdf,
        self._batch_size,
        shuffle=False,
        is_fitting=False,
    )

    return self._transform(dataset=self._dataset, dataloader=dataloader)