Source code for botorch.acquisition.thompson_sampling

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.


import torch
from botorch.acquisition.analytic import AcquisitionFunction
from botorch.acquisition.objective import (
    IdentityMCObjective,
    MCAcquisitionObjective,
    PosteriorTransform,
)
from botorch.exceptions.errors import UnsupportedError
from botorch.models.deterministic import GenericDeterministicModel
from botorch.models.model import Model
from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
from botorch.utils.transforms import is_ensemble, t_batch_mode_transform
from torch import Tensor


BATCH_SIZE_CHANGE_ERROR = """The batch size of PathwiseThompsonSampling should \
not change during a forward pass - was {}, now {}. Please re-initialize the \
acquisition if you want to change the batch size."""



[docs]
class PathwiseThompsonSampling(AcquisitionFunction):
    r"""Single-outcome Thompson Sampling packaged as an (analytic)
    acquisition function. Querying the acquisition function gives the summed
    values of one or more draws from a pathwise drawn posterior sample, and thus
    it maximization yields one (or multiple) Thompson sample(s).

    Example:
        >>> model = SingleTaskGP(train_X, train_Y)
        >>> TS = PathwiseThompsonSampling(model)
    """

    def __init__(
        self,
        model: Model,
        objective: MCAcquisitionObjective | None = None,
        posterior_transform: PosteriorTransform | None = None,
    ) -> None:
        r"""Single-outcome TS.

        If using a multi-output ``model``, the acquisition function requires either an
        ``objective`` or a ``posterior_transform`` that transforms the multi-output
        posterior samples to single-output posterior samples.

        Args:
            model: A fitted GP model.
            objective: The MCAcquisitionObjective under which the samples are
                evaluated. Defaults to ``IdentityMCObjective()``.
            posterior_transform: An optional PosteriorTransform.
        """

        super().__init__(model=model)
        self.batch_size: int | None = None
        self.samples: GenericDeterministicModel | None = None
        self.ensemble_indices: Tensor | None = None

        # NOTE: This conditional block is copied from MCAcquisitionFunction, we should
        # consider inheriting from it and e.g. getting the X_pending logic as well.
        if objective is None and model.num_outputs != 1:
            if posterior_transform is None:
                raise UnsupportedError(
                    "Must specify an objective or a posterior transform when using "
                    "a multi-output model."
                )
            elif not posterior_transform.scalarize:
                raise UnsupportedError(
                    "If using a multi-output model without an objective, "
                    "posterior_transform must scalarize the output."
                )
        if objective is None:
            objective = IdentityMCObjective()
        self.objective = objective
        self.posterior_transform = posterior_transform


[docs]
    def redraw(self, batch_size: int) -> None:
        sample_shape = (batch_size,)
        self.samples = get_matheron_path_model(
            model=self.model, sample_shape=torch.Size(sample_shape)
        )
        if is_ensemble(self.model):
            # the ensembling dimension is assumed to be part of the batch shape
            model_batch_shape = self.model.batch_shape
            if len(model_batch_shape) > 1:
                raise NotImplementedError(
                    "Ensemble models with more than one ensemble dimension are not "
                    "yet supported."
                )
            num_ensemble = model_batch_shape[0]
            # ensemble_indices is cached here to ensure that the acquisition function
            # becomes deterministic for the same input and can be optimized with LBFGS.
            # ensemble_indices is used in select_from_ensemble_models.
            self.ensemble_indices = torch.randint(
                0,
                num_ensemble,
                (*sample_shape, 1, self.model.num_outputs),
            )



[docs]
    @t_batch_mode_transform()
    def forward(self, X: Tensor) -> Tensor:
        r"""Evaluate the pathwise posterior sample draws on the candidate set X.

        Args:
            X: A ``batch_shape x q x d``-dim batched tensor of ``d``-dim design points.

        Returns:
            A ``batch_shape``-dim tensor of evaluations on the posterior sample draws,
            where the samples are summed over the q-batch dimension.
        """
        objective_values = self._pathwise_forward(X)  # batch_shape x q
        # NOTE: The current implementation sums over the q-batch dimension, which means
        # that we are optimizing the sum of independent Thompson samples. In the future,
        # we can leverage *batched* L-BFGS optimization, rather than summing over the q
        # dimension, which will guarantee descent steps for all members of the batch
        # through batch-member-specific learning rate selection.
        return objective_values.sum(-1)  # batch_shape


    def _pathwise_forward(self, X: Tensor) -> Tensor:
        """Evaluate the pathwise posterior sample draws on the candidate set X.

        Args:
            X: A ``batch_shape x q x d``-dim batched tensor of ``d``-dim design points.

        Returns:
            A ``batch_shape x q``-dim tensor of evaluations on the posterior
            sample draws.
        """
        batch_size = X.shape[-2]
        # batch_shape x q x 1 x d
        X = X.unsqueeze(-2)
        if self.samples is None:
            self.batch_size = batch_size
            self.redraw(batch_size=batch_size)

        if self.batch_size != batch_size:
            raise ValueError(
                BATCH_SIZE_CHANGE_ERROR.format(self.batch_size, batch_size)
            )
        # batch_shape x q [x num_ensembles] x 1 x m
        posterior_values = self.samples(X)
        # batch_shape x q [x num_ensembles] x m
        posterior_values = posterior_values.squeeze(-2)

        # batch_shape x q x m
        posterior_values = self.select_from_ensemble_models(values=posterior_values)

        if self.posterior_transform:
            posterior_values = self.posterior_transform.evaluate(
                Y=posterior_values, X=X
            )
        # objective removes the ``m`` dimension
        objective_values = self.objective(posterior_values)  # batch_shape x q
        return objective_values


[docs]
    def select_from_ensemble_models(self, values: Tensor):
        """Subselecting a value associated with a single sample in the ensemble for each
        element of samples that is not associated with an ensemble dimension.

        NOTE: 1) uses ``self.model`` and ``is_ensemble`` to determine whether or not an
        ensembling dimension is present. 2) uses ``self.ensemble_indices`` to select the
        value associated with a single sample in the ensemble. ``ensemble_indices``
        contains uniformly randomly sample indices for each element of the ensemble, but
        is cached to make the evaluation of the acquisition function deterministic.

        Args:
            values: A ``batch_shape x num_draws x q [x num_ensemble] x m``-dim Tensor.

        Returns:
            A``batch_shape x num_draws x q x m``-dim where each element is contains a
            single sample from the ensemble, selected with ``self.ensemble_indices``.
        """
        if not is_ensemble(self.model):
            return values

        ensemble_dim = -2
        # ``ensemble_indices`` are fixed so that the acquisition function becomes
        # deterministic for the same input and can be optimized with LBFGS.
        # ensemble indices have shape num_paths x 1 x m
        self.ensemble_indices = self.ensemble_indices.to(device=values.device)
        index = self.ensemble_indices
        input_batch_shape = values.shape[:-3]
        index = index.expand(*input_batch_shape, *index.shape)
        # samples is batch_shape x q x num_ensemble x m
        values_wo_ensemble = torch.gather(values, dim=ensemble_dim, index=index)
        return values_wo_ensemble.squeeze(
            ensemble_dim
        )  # removing the ensemble dimension