Source code for botorch.models.transforms.outcome

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Outcome transformations for automatically transforming and un-transforming
model outputs. Outcome transformations are typically part of a Model and
applied (i) within the model constructor to transform the train observations
to the model space, and (ii) in the ``Model.posterior`` call to untransform
the model posterior back to the original space.

References

.. [eriksson2021scalable]
    D. Eriksson, M. Poloczek. Scalable Constrained Bayesian Optimization.
    International Conference on Artificial Intelligence and Statistics. PMLR, 2021,
    http://proceedings.mlr.press/v130/eriksson21a.html

"""

from __future__ import annotations

import logging
import warnings
from abc import ABC, abstractmethod
from collections import OrderedDict

import torch
from botorch.models.transforms.utils import (
    nanstd,
    norm_to_lognorm_mean,
    norm_to_lognorm_variance,
)
from botorch.models.utils.assorted import get_task_value_remapping
from botorch.posteriors import GPyTorchPosterior, Posterior, TransformedPosterior
from botorch.posteriors.fully_bayesian import GaussianMixturePosterior
from botorch.utils.transforms import normalize_indices
from linear_operator.operators import (
    CholLinearOperator,
    DiagLinearOperator,
    TriangularLinearOperator,
)
from torch import Tensor
from torch.nn import Module, ModuleDict

logger: logging.Logger = logging.getLogger(__name__)



[docs]
class OutcomeTransform(Module, ABC):
    """Abstract base class for outcome transforms."""


[docs]
    @abstractmethod
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Transform the outcomes in a model's training targets

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        pass  # pragma: no cover



[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        This functionality is used to properly treat outcome transformations
        in the ``subset_model`` functionality.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement the `subset_output` method"
        )



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-transform previously transformed outcomes

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of transformed training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of transformed observation
                noises associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            A two-tuple with the un-transformed outcomes:

            - The un-transformed outcome observations.
            - The un-transformed observation noise (if applicable).
        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement the `untransform` method"
        )


    @property
    def _is_linear(self) -> bool:
        """
        True for transformations such as ``Standardize``; these should be able to apply
        ``untransform_posterior`` to a GPyTorchPosterior and return a GPyTorchPosterior,
        because a multivariate normal distribution should remain multivariate normal
        after applying the transform.
        """
        return False


[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> Posterior:
        r"""Un-transform a posterior.

        Posteriors with ``_is_linear=True`` should return a ``GPyTorchPosterior`` when
        ``posterior`` is a ``GPyTorchPosterior``. Posteriors with ``_is_linear=False``
        likely return a ``TransformedPosterior`` instead.

        Args:
            posterior: A posterior in the transformed space.
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            The un-transformed posterior.
        """
        raise NotImplementedError(
            f"{self.__class__.__name__} does not implement the "
            "`untransform_posterior` method"
        )





[docs]
class ChainedOutcomeTransform(OutcomeTransform, ModuleDict):
    r"""An outcome transform representing the chaining of individual transforms"""

    def __init__(self, **transforms: OutcomeTransform) -> None:
        r"""Chaining of outcome transforms.

        Args:
            transforms: The transforms to chain. Internally, the names of the
                kwargs are used as the keys for accessing the individual
                transforms on the module.
        """
        super().__init__(OrderedDict(transforms))


[docs]
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Transform the outcomes in a model's training targets

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        for tf in self.values():
            Y, Yvar = tf.forward(Y=Y, Yvar=Yvar, X=X)
        return Y, Yvar



[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        return self.__class__(
            **{name: tf.subset_output(idcs=idcs) for name, tf in self.items()}
        )



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-transform previously transformed outcomes

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of transformed training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of transformed observation
                noises associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            A two-tuple with the un-transformed outcomes:

            - The un-transformed outcome observations.
            - The un-transformed observation noise (if applicable).
        """
        for tf in reversed(self.values()):
            Y, Yvar = tf.untransform(Y=Y, Yvar=Yvar, X=X)
        return Y, Yvar


    @property
    def _is_linear(self) -> bool:
        """
        A ``ChainedOutcomeTransform`` is linear only if all of the component transforms
        are linear.
        """
        return all(octf._is_linear for octf in self.values())


[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> Posterior:
        r"""Un-transform a posterior

        Args:
            posterior: A posterior in the transformed space.
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            The un-transformed posterior.
        """
        for tf in reversed(self.values()):
            posterior = tf.untransform_posterior(posterior, X=X)
        return posterior





[docs]
class Standardize(OutcomeTransform):
    r"""Standardize outcomes (zero mean, unit variance).

    This module is stateful: If in train mode, calling forward updates the
    module state (i.e. the mean/std normalizing constants). If in eval mode,
    calling forward simply applies the standardization using the current module
    state.
    """

    def __init__(
        self,
        m: int,
        outputs: list[int] | None = None,
        batch_shape: torch.Size = torch.Size(),  # noqa: B008
        min_stdv: float = 1e-8,
    ) -> None:
        r"""Standardize outcomes (zero mean, unit variance).

        Args:
            m: The output dimension.
            outputs: Which of the outputs to standardize. If omitted, all
                outputs will be standardized.
            batch_shape: The batch_shape of the training targets.
            min_stdv: The minimum standard deviation for which to perform
                standardization (if lower, only de-mean the data).
        """
        super().__init__()
        self.register_buffer("means", torch.zeros(*batch_shape, 1, m))
        self.register_buffer("stdvs", torch.ones(*batch_shape, 1, m))
        self.register_buffer("_stdvs_sq", torch.ones(*batch_shape, 1, m))
        self.register_buffer("_is_trained", torch.tensor(False))
        self._outputs = normalize_indices(outputs, d=m)
        self._m = m
        self._batch_shape = batch_shape
        self._min_stdv = min_stdv

    def _get_per_input_means_stdvs(
        self, X: Tensor, include_stdvs_sq: bool
    ) -> tuple[Tensor, Tensor, Tensor | None]:
        r"""Get per-input means and stdvs.

        Args:
            X: A ``batch_shape x n x d``-dim tensor of input parameters.
            include_stdvs_sq: Whether to include the stdvs squared.
                This parameter is not used by this method

        Returns:
            A three-tuple with the  means and stdvs:

            - The per-input means.
            - The per-input stdvs.
            - The per-input stdvs squared.
        """
        return self.means, self.stdvs, self._stdvs_sq

    def _validate_training_inputs(self, Y: Tensor, Yvar: Tensor | None = None) -> None:
        """Validate training inputs.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises.
        """
        if Y.shape[:-2] != self._batch_shape:
            raise RuntimeError(
                f"Expected Y.shape[:-2] to be {self._batch_shape}, matching "
                f"the `batch_shape` argument to `{self.__class__.__name__}`, but got "
                f"Y.shape[:-2]={Y.shape[:-2]}."
            )
        elif Y.shape[-2] < 1:
            raise ValueError(f"Can't standardize with no observations. {Y.shape=}.")
        elif Y.size(-1) != self._m:
            raise RuntimeError(
                f"Wrong output dimension. Y.size(-1) is {Y.size(-1)}; expected "
                f"{self._m}."
            )


[docs]
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Standardize outcomes.

        If the module is in train mode, this updates the module state (i.e. the
        mean/std normalizing constants). If the module is in eval mode, simply
        applies the normalization using the module state.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).
                This argument is not used by this transform, but it is used by
                its subclass, ``StratifiedStandardize``.

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        if self.training:
            self._validate_training_inputs(Y=Y, Yvar=Yvar)
            if Y.shape[-2] == 1:
                stdvs = torch.ones(
                    (*Y.shape[:-2], 1, Y.shape[-1]), dtype=Y.dtype, device=Y.device
                )
            else:
                stdvs = nanstd(X=Y, dim=-2, keepdim=True)
            stdvs = stdvs.where(stdvs >= self._min_stdv, torch.full_like(stdvs, 1.0))
            means = Y.nanmean(dim=-2, keepdim=True)
            if self._outputs is not None:
                unused = [i for i in range(self._m) if i not in self._outputs]
                means[..., unused] = 0.0
                stdvs[..., unused] = 1.0
            self.means = means
            self.stdvs = stdvs
            self._stdvs_sq = stdvs.pow(2)
            self._is_trained = torch.tensor(True)
        include_stdvs_sq = Yvar is not None
        means, stdvs, stdvs_sq = self._get_per_input_means_stdvs(
            X=X, include_stdvs_sq=include_stdvs_sq
        )
        Y_tf = (Y - means) / stdvs
        Yvar_tf = Yvar / stdvs_sq if include_stdvs_sq else None
        return Y_tf, Yvar_tf



[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        new_m = len(idcs)
        if new_m > self._m:
            raise RuntimeError(
                "Trying to subset a transform have more outputs than "
                " the original transform."
            )
        nlzd_idcs = normalize_indices(idcs, d=self._m)
        new_outputs = None
        if self._outputs is not None:
            new_outputs = [i for i in self._outputs if i in nlzd_idcs]
        new_tf = self.__class__(
            m=new_m,
            outputs=new_outputs,
            batch_shape=self._batch_shape,
            min_stdv=self._min_stdv,
        )
        new_tf.means = self.means[..., nlzd_idcs]
        new_tf.stdvs = self.stdvs[..., nlzd_idcs]
        new_tf._stdvs_sq = self._stdvs_sq[..., nlzd_idcs]
        new_tf._is_trained = self._is_trained
        if not self.training:
            new_tf.eval()
        return new_tf



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-standardize outcomes.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of standardized targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of standardized observation
                noises associated with the targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform, but it is used by
                its subclass, ``StratifiedStandardize``.

        Returns:
            A two-tuple with the un-standardized outcomes:

            - The un-standardized outcome observations.
            - The un-standardized observation noise (if applicable).
        """
        if not self._is_trained:
            raise RuntimeError(
                "`Standardize` transforms must be called on outcome data "
                "(e.g. `transform(Y)`) before calling `untransform`, since "
                "means and standard deviations need to be computed."
            )
        include_stdvs_sq = Yvar is not None
        means, stdvs, stdvs_sq = self._get_per_input_means_stdvs(
            X=X, include_stdvs_sq=include_stdvs_sq
        )
        Y_utf = means + stdvs * Y
        Yvar_utf = stdvs_sq * Yvar if include_stdvs_sq else None
        return Y_utf, Yvar_utf


    @property
    def _is_linear(self) -> bool:
        return True


[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> GPyTorchPosterior | TransformedPosterior:
        r"""Un-standardize the posterior.

        Args:
            posterior: A posterior in the standardized space.
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform, but it is used by
                its subclass, ``StratifiedStandardize``.

        Returns:
            The un-standardized posterior. If the input posterior is a
            ``GPyTorchPosterior`` or ``GaussianMixturePosterior``, return
            the same type with analytically rescaled distribution. Otherwise,
            return a ``TransformedPosterior``.
        """
        if self._outputs is not None:
            raise NotImplementedError(
                "Standardize does not yet support output selection for "
                "untransform_posterior"
            )
        if not self._is_trained:
            raise RuntimeError(
                "`Standardize` transforms must be called on outcome data "
                "(e.g. `transform(Y)`) before calling `untransform_posterior`, since "
                "means and standard deviations need to be computed."
            )
        is_mtgp_posterior = False
        if type(posterior) in (GPyTorchPosterior, GaussianMixturePosterior):
            is_mtgp_posterior = posterior._is_mt
        if not self._m == posterior._extended_shape()[-1] and not is_mtgp_posterior:
            raise RuntimeError(
                "Incompatible output dimensions encountered. Transform has output "
                f"dimension {self._m} and posterior has "
                f"{posterior._extended_shape()[-1]}."
            )

        if type(posterior) not in (GPyTorchPosterior, GaussianMixturePosterior):
            # fall back to TransformedPosterior
            # this applies to subclasses of GPyTorchPosterior like MultitaskGPPosterior
            return TransformedPosterior(
                posterior=posterior,
                sample_transform=lambda s: self.means + self.stdvs * s,
                mean_transform=lambda m, v: self.means + self.stdvs * m,
                variance_transform=lambda m, v: self._stdvs_sq * v,
            )
        # GPyTorchPosterior (TODO: Should we Lazy-evaluate the mean here as well?)
        mvn = posterior.distribution
        offset, scale_fac, _ = self._get_per_input_means_stdvs(
            X=X, include_stdvs_sq=False
        )
        if not posterior._is_mt:
            mean_tf = offset.squeeze(-1) + scale_fac.squeeze(-1) * mvn.mean
            scale_fac = scale_fac.squeeze(-1).expand_as(mean_tf)
        else:
            mean_tf = offset + scale_fac * mvn.mean
            reps = mean_tf.shape[-2:].numel() // scale_fac.size(-1)
            scale_fac = scale_fac.squeeze(-2)
            if mvn._interleaved:
                scale_fac = scale_fac.repeat(*[1 for _ in scale_fac.shape[:-1]], reps)
            else:
                scale_fac = torch.repeat_interleave(scale_fac, reps, dim=-1)

        if (
            not mvn.islazy
            or mvn._MultivariateNormal__unbroadcasted_scale_tril is not None
        ):
            # if already computed, we can save a lot of time using scale_tril
            covar_tf = CholLinearOperator(
                TriangularLinearOperator(mvn.scale_tril * scale_fac.unsqueeze(-1))
            )
        else:
            lcv = mvn.lazy_covariance_matrix
            scale_fac = scale_fac.expand(lcv.shape[:-1])
            scale_mat = DiagLinearOperator(scale_fac)
            covar_tf = scale_mat @ lcv @ scale_mat

        kwargs = {"interleaved": mvn._interleaved} if posterior._is_mt else {}
        mvn_tf = mvn.__class__(mean=mean_tf, covariance_matrix=covar_tf, **kwargs)
        return type(posterior)(mvn_tf)





[docs]
class StratifiedStandardize(Standardize):
    r"""Standardize outcomes (zero mean, unit variance) along stratification dimension.

    This module is stateful: If in train mode, calling forward updates the
    module state (i.e. the mean/std normalizing constants). If in eval mode,
    calling forward simply applies the standardization using the current module
    state.
    """

    def __init__(
        self,
        stratification_idx: int,
        all_task_values: Tensor,
        batch_shape: torch.Size = torch.Size(),  # noqa: B008
        min_stdv: float = 1e-8,
        dtype: torch.dtype = torch.double,
    ) -> None:
        r"""Standardize outcomes (zero mean, unit variance) along stratification dim.

        Note: This currently only supports single output models
        (including multi-task models that have a single output).

        Args:
            stratification_idx: The index of the stratification dimension in the
                input tensor X.
            all_task_values: ``t``-dim tensor of all possible task values that could
                appear in the dataset.
            batch_shape: The batch_shape of the training targets.
            min_stdv: The minimum standard deviation for which to perform
                standardization (if lower, only de-mean the data).
            dtype: The data type for internal computations.
        """
        OutcomeTransform.__init__(self)
        self._stratification_idx = stratification_idx
        all_task_values = all_task_values.unique(sorted=True)
        self.strata_mapping = get_task_value_remapping(
            all_task_values=all_task_values,
            dtype=dtype,
        )
        if self.strata_mapping is None:
            self.strata_mapping = all_task_values
        n_strata = self.strata_mapping.shape[0]
        self._min_stdv = min_stdv
        self.register_buffer("means", torch.zeros(*batch_shape, n_strata, 1))
        self.register_buffer("stdvs", torch.ones(*batch_shape, n_strata, 1))
        self.register_buffer("_stdvs_sq", torch.ones(*batch_shape, n_strata, 1))
        self.register_buffer("_is_trained", torch.tensor(False))
        self._batch_shape = batch_shape
        self._m = 1  # TODO: support multiple outputs
        self._outputs = None


[docs]
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Standardize outcomes.

        If the module is in train mode, this updates the module state (i.e. the
        mean/std normalizing constants). If the module is in eval mode, simply
        applies the normalization using the module state.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of input parameters.

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        if X is None:
            raise ValueError("X is required for StratifiedStandardize.")
        if self.training:
            self._validate_training_inputs(Y=Y, Yvar=Yvar)
            self.means = self.means.to(dtype=X.dtype, device=X.device)
            self.stdvs = self.stdvs.to(dtype=X.dtype, device=X.device)
            self._stdvs_sq = self._stdvs_sq.to(dtype=X.dtype, device=X.device)
            strata = X[..., self._stratification_idx].long()
            unique_strata = strata.unique()
            for s in unique_strata:
                mapped_strata = self.strata_mapping[s].long()
                mask = strata != s
                Y_strata = Y.clone()
                Y_strata[..., mask, :] = float("nan")
                stdvs = (
                    torch.ones_like(Y_strata)
                    if Y.shape[-2] == 1
                    else nanstd(X=Y_strata, dim=-2)
                )
                stdvs = stdvs.where(
                    stdvs >= self._min_stdv, torch.full_like(stdvs, 1.0)
                )
                means = Y_strata.nanmean(dim=-2)
                self.means[..., mapped_strata, :] = means
                self.stdvs[..., mapped_strata, :] = stdvs
                self._stdvs_sq[..., mapped_strata, :] = stdvs.pow(2)
            self._is_trained = torch.tensor(True)
        training = self.training
        self.training = False
        tf_Y, tf_Yvar = super().forward(Y=Y, Yvar=Yvar, X=X)
        self.training = training
        return tf_Y, tf_Yvar


    def _get_per_input_means_stdvs(
        self, X: Tensor, include_stdvs_sq: bool
    ) -> tuple[Tensor, Tensor, Tensor | None]:
        r"""Get per-input means and stdvs.

        Args:
            X: A ``batch_shape x n x d``-dim tensor of input parameters.
            include_stdvs_sq: Whether to include the stdvs squared.

        Returns:
            A three-tuple with the per-input means and stdvs:

            - The per-input means.
            - The per-input stdvs.
            - The per-input stdvs squared.
        """
        strata = X[..., self._stratification_idx].long()
        mapped_strata_float = self.strata_mapping[strata]
        # Check for unobserved tasks (mapped to NaN) and warn
        unobserved_mask = torch.isnan(mapped_strata_float)
        if unobserved_mask.any():
            warnings.warn(
                "Predictions are being made for tasks that were not observed "
                "during training. These tasks will use an identity transform "
                "(mean=0, stdv=1).",
                stacklevel=3,
            )
            # Map unobserved tasks to index 0 temporarily for gather operation
            mapped_strata_float = mapped_strata_float.clone()
            mapped_strata_float[unobserved_mask] = 0.0
        mapped_strata = mapped_strata_float.unsqueeze(-1).long()
        # get means and stdvs for each strata
        n_extra_batch_dims = mapped_strata.ndim - 2 - len(self._batch_shape)
        expand_shape = mapped_strata.shape[:n_extra_batch_dims] + self.means.shape
        means = torch.gather(
            input=self.means.expand(expand_shape),
            dim=-2,
            index=mapped_strata,
        )
        stdvs = torch.gather(
            input=self.stdvs.expand(expand_shape),
            dim=-2,
            index=mapped_strata,
        )
        # Apply identity transform (mean=0, stdv=1) for unobserved tasks
        if unobserved_mask.any():
            unobserved_mask_expanded = unobserved_mask.unsqueeze(-1)
            means = means.clone()
            stdvs = stdvs.clone()
            means[unobserved_mask_expanded] = 0.0
            stdvs[unobserved_mask_expanded] = 1.0
        if include_stdvs_sq:
            stdvs_sq = torch.gather(
                input=self._stdvs_sq.expand(expand_shape),
                dim=-2,
                index=mapped_strata,
            )
            if unobserved_mask.any():
                stdvs_sq = stdvs_sq.clone()
                stdvs_sq[unobserved_mask_expanded] = 1.0
        else:
            stdvs_sq = None
        return means, stdvs, stdvs_sq


[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        raise NotImplementedError



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-standardize outcomes.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of standardized targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of standardized observation
                noises associated with the targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of input parameters.

        Returns:
            A two-tuple with the un-standardized outcomes:

            - The un-standardized outcome observations.
            - The un-standardized observation noise (if applicable).
        """
        if X is None:
            raise ValueError("X is required for StratifiedStandardize.")
        return super().untransform(Y=Y, Yvar=Yvar, X=X)



[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> GPyTorchPosterior | TransformedPosterior:
        r"""Un-standardize the posterior.

        Args:
            posterior: A posterior in the standardized space.
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).

        Returns:
            The un-standardized posterior. If the input posterior is a
            ``GPyTorchPosterior`` or ``GaussianMixturePosterior``, return
            the same type with analytically rescaled distribution. Otherwise,
            return a ``TransformedPosterior``.
        """
        if X is None:
            raise ValueError("X is required for StratifiedStandardize.")
        return super().untransform_posterior(posterior=posterior, X=X)





[docs]
class Log(OutcomeTransform):
    r"""Log-transform outcomes.

    Useful if the targets are modeled using a (multivariate) log-Normal
    distribution. This means that we can use a standard GP model on the
    log-transformed outcomes and un-transform the model posterior of that GP.

    When observation noise is provided, the variance is transformed using the
    delta method approximation: Var[log(Y)] ≈ Var[Y] / Y^2. This assumes that
    the observation noise is Gaussian in the log-transformed space, which
    corresponds to log-normal observation noise in the original space.
    """

    def __init__(self, outputs: list[int] | None = None) -> None:
        r"""Log-transform outcomes.

        Args:
            outputs: Which of the outputs to log-transform. If omitted, all
                outputs will be standardized.
        """
        super().__init__()
        self._outputs = outputs


[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        new_outputs = None
        if self._outputs is not None:
            if min(self._outputs + idcs) < 0:
                raise NotImplementedError(
                    f"Negative indexing not supported for {self.__class__.__name__} "
                    "when subsetting outputs and only transforming some outputs."
                )
            new_outputs = [i for i in self._outputs if i in idcs]
        new_tf = self.__class__(outputs=new_outputs)
        if not self.training:
            new_tf.eval()
        return new_tf



[docs]
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Log-transform outcomes.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        Y_tf = torch.log(Y)
        outputs = normalize_indices(self._outputs, d=Y.size(-1))
        if outputs is not None:
            Y_tf = torch.stack(
                [
                    Y_tf[..., i] if i in outputs else Y[..., i]
                    for i in range(Y.size(-1))
                ],
                dim=-1,
            )
        if Yvar is not None:
            # Delta method: Var[log(Y)] ≈ Var[Y] / Y^2
            Yvar_tf = Yvar / Y.clamp(min=1e-8).pow(2)
            if outputs is not None:
                Yvar = torch.stack(
                    [
                        Yvar_tf[..., i] if i in outputs else Yvar[..., i]
                        for i in range(Y.size(-1))
                    ],
                    dim=-1,
                )
            else:
                Yvar = Yvar_tf
        return Y_tf, Yvar



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-transform log-transformed outcomes

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of log-transformed targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of log- transformed
                observation noises associated with the training targets
                (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            A two-tuple with the un-transformed outcomes:

            - The exponentiated outcome observations.
            - The exponentiated observation noise (if applicable).
        """
        Y_utf = torch.exp(Y)
        outputs = normalize_indices(self._outputs, d=Y.size(-1))
        if outputs is not None:
            Y_utf = torch.stack(
                [
                    Y_utf[..., i] if i in outputs else Y[..., i]
                    for i in range(Y.size(-1))
                ],
                dim=-1,
            )
        if Yvar is not None:
            # Reverse of delta method: Var[Y] = Var[log(Y)] * Y^2
            # Since Y = exp(Y_log), this is Var[log(Y)] * exp(2 * Y_log)
            logger.debug(
                "Log.untransform: Reverse delta method for observation noise "
                "is a lossy operation. The untransformed variance is an "
                "approximation that may not exactly match the original variance."
            )
            Yvar_utf = Yvar * torch.exp(2.0 * Y)
            if outputs is not None:
                Yvar = torch.stack(
                    [
                        Yvar_utf[..., i] if i in outputs else Yvar[..., i]
                        for i in range(Y.size(-1))
                    ],
                    dim=-1,
                )
            else:
                Yvar = Yvar_utf
        return Y_utf, Yvar



[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> TransformedPosterior:
        r"""Un-transform the log-transformed posterior.

        Args:
            posterior: A posterior in the log-transformed space.
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            The un-transformed posterior.
        """
        if self._outputs is not None:
            raise NotImplementedError(
                "Log does not yet support output selection for untransform_posterior"
            )
        return TransformedPosterior(
            posterior=posterior,
            sample_transform=torch.exp,
            mean_transform=norm_to_lognorm_mean,
            variance_transform=norm_to_lognorm_variance,
        )





[docs]
class Power(OutcomeTransform):
    r"""Power-transform outcomes.

    Useful if the targets are modeled using a (multivariate) power transform of
    a Normal distribution. This means that we can use a standard GP model on the
    power-transformed outcomes and un-transform the model posterior of that GP.
    """

    def __init__(self, power: float, outputs: list[int] | None = None) -> None:
        r"""Power-transform outcomes.

        Args:
            outputs: Which of the outputs to power-transform. If omitted, all
                outputs will be standardized.
        """
        super().__init__()
        self._outputs = outputs
        self.power = power


[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        new_outputs = None
        if self._outputs is not None:
            if min(self._outputs + idcs) < 0:
                raise NotImplementedError(
                    f"Negative indexing not supported for {self.__class__.__name__} "
                    "when subsetting outputs and only transforming some outputs."
                )
            new_outputs = [i for i in self._outputs if i in idcs]
        new_tf = self.__class__(power=self.power, outputs=new_outputs)
        if not self.training:
            new_tf.eval()
        return new_tf



[docs]
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Power-transform outcomes.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        Y_tf = Y.pow(self.power)
        outputs = normalize_indices(self._outputs, d=Y.size(-1))
        if outputs is not None:
            Y_tf = torch.stack(
                [
                    Y_tf[..., i] if i in outputs else Y[..., i]
                    for i in range(Y.size(-1))
                ],
                dim=-1,
            )
        if Yvar is not None:
            # TODO: Delta method, possibly issue warning
            raise NotImplementedError(
                "Power does not yet support transforming observation noise"
            )
        return Y_tf, Yvar



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-transform power-transformed outcomes

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of power-transformed targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of power-transformed
                observation noises associated with the training targets
                (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            A two-tuple with the un-transformed outcomes:

            - The un-power transformed outcome observations.
            - The un-power transformed observation noise (if applicable).
        """
        Y_utf = Y.pow(1.0 / self.power)
        outputs = normalize_indices(self._outputs, d=Y.size(-1))
        if outputs is not None:
            Y_utf = torch.stack(
                [
                    Y_utf[..., i] if i in outputs else Y[..., i]
                    for i in range(Y.size(-1))
                ],
                dim=-1,
            )
        if Yvar is not None:
            # TODO: Delta method, possibly issue warning
            raise NotImplementedError(
                "Power does not yet support transforming observation noise"
            )
        return Y_utf, Yvar



[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> TransformedPosterior:
        r"""Un-transform the power-transformed posterior.

        Args:
            posterior: A posterior in the power-transformed space.
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            The un-transformed posterior.
        """
        if self._outputs is not None:
            raise NotImplementedError(
                "Power does not yet support output selection for untransform_posterior"
            )
        return TransformedPosterior(
            posterior=posterior,
            sample_transform=lambda x: x.pow(1.0 / self.power),
        )





[docs]
class Bilog(OutcomeTransform):
    r"""Bilog-transform outcomes.

    The Bilog transform [eriksson2021scalable]_ is useful for modeling outcome
    constraints as it magnifies values near zero and flattens extreme values.
    """

    def __init__(self, outputs: list[int] | None = None) -> None:
        r"""Bilog-transform outcomes.

        Args:
            outputs: Which of the outputs to Bilog-transform. If omitted, all
                outputs will be transformed.
        """
        super().__init__()
        self._outputs = outputs


[docs]
    def subset_output(self, idcs: list[int]) -> OutcomeTransform:
        r"""Subset the transform along the output dimension.

        Args:
            idcs: The output indices to subset the transform to.

        Returns:
            The current outcome transform, subset to the specified output indices.
        """
        new_outputs = None
        if self._outputs is not None:
            if min(self._outputs + idcs) < 0:
                raise NotImplementedError(
                    f"Negative indexing not supported for {self.__class__.__name__} "
                    "when subsetting outputs and only transforming some outputs."
                )
            new_outputs = [i for i in self._outputs if i in idcs]
        new_tf = self.__class__(outputs=new_outputs)
        if not self.training:
            new_tf.eval()
        return new_tf



[docs]
    def forward(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Bilog-transform outcomes.

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of training targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of observation noises
                associated with the training targets (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of training inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            A two-tuple with the transformed outcomes:

            - The transformed outcome observations.
            - The transformed observation noise (if applicable).
        """
        Y_tf = Y.sign() * (Y.abs() + 1.0).log()
        outputs = normalize_indices(self._outputs, d=Y.size(-1))
        if outputs is not None:
            Y_tf = torch.stack(
                [
                    Y_tf[..., i] if i in outputs else Y[..., i]
                    for i in range(Y.size(-1))
                ],
                dim=-1,
            )
        if Yvar is not None:
            raise NotImplementedError(
                "Bilog does not yet support transforming observation noise"
            )
        return Y_tf, Yvar



[docs]
    def untransform(
        self, Y: Tensor, Yvar: Tensor | None = None, X: Tensor | None = None
    ) -> tuple[Tensor, Tensor | None]:
        r"""Un-transform bilog-transformed outcomes

        Args:
            Y: A ``batch_shape x n x m``-dim tensor of bilog-transformed targets.
            Yvar: A ``batch_shape x n x m``-dim tensor of bilog-transformed
                observation noises associated with the training targets
                (if applicable).
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            A two-tuple with the un-transformed outcomes:

            - The un-transformed outcome observations.
            - The un-transformed observation noise (if applicable).
        """
        Y_utf = Y.sign() * Y.abs().expm1()
        outputs = normalize_indices(self._outputs, d=Y.size(-1))
        if outputs is not None:
            Y_utf = torch.stack(
                [
                    Y_utf[..., i] if i in outputs else Y[..., i]
                    for i in range(Y.size(-1))
                ],
                dim=-1,
            )
        if Yvar is not None:
            # TODO: Delta method, possibly issue warning
            raise NotImplementedError(
                "Bilog does not yet support transforming observation noise"
            )
        return Y_utf, Yvar



[docs]
    def untransform_posterior(
        self, posterior: Posterior, X: Tensor | None = None
    ) -> TransformedPosterior:
        r"""Un-transform the bilog-transformed posterior.

        Args:
            posterior: A posterior in the bilog-transformed space.
            X: A ``batch_shape x n x d``-dim tensor of inputs (if applicable).
                This argument is not used by this transform.

        Returns:
            The un-transformed posterior.
        """
        if self._outputs is not None:
            raise NotImplementedError(
                "Bilog does not yet support output selection for untransform_posterior"
            )
        return TransformedPosterior(
            posterior=posterior,
            sample_transform=lambda x: x.sign() * x.abs().expm1(),
        )