Source code for botorch.models.transforms.utils

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations

from functools import wraps

import torch
from torch import Tensor



[docs]
def lognorm_to_norm(mu: Tensor, Cov: Tensor) -> tuple[Tensor, Tensor]:
    """Compute mean and covariance of a MVN from those of the associated log-MVN

    If ``Y`` is log-normal with mean mu_ln and covariance Cov_ln, then
    ``X ~ N(mu_n, Cov_n)`` with

        Cov_n_{ij} = log(1 + Cov_ln_{ij} / (mu_ln_{i} * mu_n_{j}))
        mu_n_{i} = log(mu_ln_{i}) - 0.5 * log(1 + Cov_ln_{ii} / mu_ln_{i}**2)

    Args:
        mu: A ``batch_shape x n`` mean vector of the log-Normal distribution.
        Cov: A ``batch_shape x n x n`` covariance matrix of the log-Normal
            distribution.

    Returns:
        A two-tuple containing:

        - The ``batch_shape x n`` mean vector of the Normal distribution
        - The ``batch_shape x n x n`` covariance matrix of the Normal distribution
    """
    Cov_n = torch.log1p(Cov / (mu.unsqueeze(-1) * mu.unsqueeze(-2)))
    mu_n = torch.log(mu) - 0.5 * torch.diagonal(Cov_n, dim1=-1, dim2=-2)
    return mu_n, Cov_n




[docs]
def norm_to_lognorm(mu: Tensor, Cov: Tensor) -> tuple[Tensor, Tensor]:
    """Compute mean and covariance of a log-MVN from its MVN sufficient statistics

    If ``X ~ N(mu, Cov)`` and ``Y = exp(X)``, then ``Y`` is log-normal with

        mu_ln_{i} = exp(mu_{i} + 0.5 * Cov_{ii})
        Cov_ln_{ij} = exp(mu_{i} + mu_{j} + 0.5 * (Cov_{ii} + Cov_{jj})) *
        (exp(Cov_{ij}) - 1)

    Args:
        mu: A ``batch_shape x n`` mean vector of the Normal distribution.
        Cov: A ``batch_shape x n x n`` covariance matrix of the Normal distribution.

    Returns:
        A two-tuple containing:

        - The ``batch_shape x n`` mean vector of the log-Normal distribution.
        - The ``batch_shape x n x n`` covariance matrix of the log-Normal
            distribution.
    """
    diag = torch.diagonal(Cov, dim1=-1, dim2=-2)
    b = mu + 0.5 * diag
    mu_ln = torch.exp(b)
    Cov_ln = torch.special.expm1(Cov) * torch.exp(b.unsqueeze(-1) + b.unsqueeze(-2))
    return mu_ln, Cov_ln




[docs]
def norm_to_lognorm_mean(mu: Tensor, var: Tensor) -> Tensor:
    """Compute mean of a log-MVN from its MVN marginals

    Args:
        mu: A ``batch_shape x n`` mean vector of the Normal distribution.
        var: A ``batch_shape x n`` variance vectorof the Normal distribution.

    Returns:
        The ``batch_shape x n`` mean vector of the log-Normal distribution.
    """
    return torch.exp(mu + 0.5 * var)




[docs]
def norm_to_lognorm_variance(mu: Tensor, var: Tensor) -> Tensor:
    """Compute variance of a log-MVN from its MVN marginals

    Args:
        mu: A ``batch_shape x n`` mean vector of the Normal distribution.
        var: A ``batch_shape x n`` variance vectorof the Normal distribution.

    Returns:
        The ``batch_shape x n`` variance vector of the log-Normal distribution.
    """
    b = mu + 0.5 * var
    return torch.special.expm1(var) * torch.exp(2 * b)




[docs]
def expand_and_copy_tensor(X: Tensor, batch_shape: torch.Size) -> Tensor:
    r"""Expand and copy X according to batch_shape.

    Args:
        X: A ``input_batch_shape x n x d``-dim tensor of inputs.
        batch_shape: The new batch shape.

    Returns:
        A ``new_batch_shape x n x d``-dim tensor of inputs, where ``new_batch_shape``
        is ``input_batch_shape`` against ``batch_shape``.
    """
    try:
        batch_shape = torch.broadcast_shapes(X.shape[:-2], batch_shape)
    except RuntimeError:
        raise RuntimeError(
            f"Provided batch shape ({batch_shape}) and input batch shape "
            f"({X.shape[:-2]}) are not broadcastable."
        )
    expand_shape = batch_shape + X.shape[-2:]
    return X.expand(expand_shape).clone()




[docs]
def subset_transform(transform):
    r"""Decorator of an input transform function to separate out indexing logic."""

    @wraps(transform)
    def f(self, X: Tensor, **kwargs) -> Tensor:
        if not hasattr(self, "indices") or self.indices is None:
            return transform(self, X, **kwargs)
        has_shape = hasattr(self, "batch_shape")
        Y = expand_and_copy_tensor(X, self.batch_shape) if has_shape else X.clone()
        Y[..., self.indices] = transform(self, X[..., self.indices], **kwargs)
        return Y

    return f




[docs]
def interaction_features(X: Tensor) -> Tensor:
    """Computes the interaction features between the inputs.

    Args:
        X: A ``batch_shape x q x d``-dim tensor of inputs.
        indices: The input dimensions to generate interaction features for.

    Returns:
        A ``n x q x 1 x (d * (d-1) / 2))``-dim tensor of interaction features.
    """
    dim = X.shape[-1]
    row_idcs, col_idcs = torch.triu_indices(dim, dim, offset=1)
    return (X.unsqueeze(-1) @ X.unsqueeze(-2))[..., row_idcs, col_idcs].unsqueeze(-2)




[docs]
def nanstd(X: Tensor, dim: int, keepdim: bool = False) -> Tensor:
    """Computes the standard deviation of the input, ignoring NaNs.

    Args:
        X: A ``batch_shape x n x d``-dim tensor of inputs.
        dim: The dimension along which to compute the standard deviation.
        keepdim: If True, the dimension along which the standard deviation is
            compute is kept.
    """
    n = (~torch.isnan(X)).sum(dim=dim, keepdim=keepdim)
    return (
        (X - X.nanmean(dim=dim, keepdim=True)).pow(2).nanmean(dim=dim, keepdim=keepdim)
        * n
        / (n - 1)
    ).sqrt()




[docs]
def kumaraswamy_warp(X: Tensor, c0: Tensor, c1: Tensor, eps: float = 1e-8) -> Tensor:
    """Warp inputs through a Kumaraswamy CDF.

    This assumes that X is contained within the unit cube. This first
    normalizes inputs to [eps, 1-eps]^d (to ensure that no values are 0 or 1)
    and then applies passes those inputs through a Kumaraswamy CDF.

    Args:
        X: A ``batch_shape x n x d``-dim tensor of inputs.
        c0: A ``d``-dim tensor of the concentration0 parameter for the
            Kumaraswamy distribution.
        c1: A ``d``-dim tensor of the concentration1 parameter for the
            Kumaraswamy distribution.
        eps: A small value that is used to ensure inputs are not 0 or 1.

    Returns:
        A ``batch_shape x n x d``-dim tensor of warped inputs.
    """
    X_range = 1 - 2 * eps
    X = torch.clamp(X * X_range + eps, eps, 1.0 - eps)
    return 1 - torch.pow((1 - torch.pow(X, c1)), c0)




[docs]
def inv_kumaraswamy_warp(
    X: Tensor, c0: Tensor, c1: Tensor, eps: float = 1e-8
) -> Tensor:
    """Map warped inputs through an inverse Kumaraswamy CDF.

    This takes warped inputs (X) and transforms those via an inverse
    Kumaraswamy CDF. This then unnormalizes the inputs using bounds of
    [eps, 1-eps]^d and ensures that the values are within [0, 1]^d.

    Args:
        X: A ``batch_shape x n x d``-dim tensor of inputs.
        c0: A ``d``-dim tensor of the concentration0 parameter for the
            Kumaraswamy distribution.
        c1: A ``d``-dim tensor of the concentration1 parameter for the
            Kumaraswamy distribution.
        eps: A small value that is used to ensure inputs are not 0 or 1.

    Returns:
        A ``batch_shape x n x d``-dim tensor of untransformed inputs.
    """
    X_range = 1 - 2 * eps
    # unnormalize from [eps, 1-eps] to [0,1]
    untf_X = (1 - (1 - X).pow(1 / c0)).pow(1 / c1)
    return ((untf_X - eps) / X_range).clamp(0.0, 1.0)