Source code for botorch.models.gpytorch

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

r"""
Abstract model class for all GPyTorch-based botorch models.

To implement your own, simply inherit from both the provided classes and a
GPyTorch Model class such as an ExactGP.
"""

from __future__ import annotations

import itertools
import warnings
from abc import ABC
from collections.abc import Mapping
from copy import deepcopy
from typing import Any, TYPE_CHECKING

import torch
from botorch.acquisition.objective import PosteriorTransform
from botorch.exceptions.errors import (
    BotorchTensorDimensionError,
    InputDataError,
    UnsupportedError,
)
from botorch.exceptions.warnings import (
    _get_single_precision_warning,
    BotorchTensorDimensionWarning,
    BotorchWarning,
    InputDataWarning,
)
from botorch.models.model import Model, ModelList
from botorch.models.utils import (
    _make_X_full,
    add_output_dim,
    extract_targets_and_noise_single_output,
    gpt_posterior_settings,
    mod_batch_shape,
    multioutput_to_batch_mode_transform,
    restore_targets_and_noise_single_output,
)
from botorch.models.utils.assorted import fantasize as fantasize_flag
from botorch.posteriors.fully_bayesian import GaussianMixturePosterior
from botorch.posteriors.gpytorch import GPyTorchPosterior
from botorch.utils.multitask import separate_mtmvn
from botorch.utils.transforms import is_ensemble
from gpytorch.distributions import MultitaskMultivariateNormal, MultivariateNormal
from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood
from linear_operator.operators import BlockDiagLinearOperator, CatLinearOperator
from torch import broadcast_shapes, Tensor

if TYPE_CHECKING:
    from botorch.posteriors.posterior_list import PosteriorList  # pragma: no cover
    from botorch.posteriors.transformed import TransformedPosterior  # pragma: no cover
    from gpytorch.likelihoods import Likelihood  # pragma: no cover


[docs] class GPyTorchModel(Model, ABC): r"""Abstract base class for models based on GPyTorch models. The easiest way to use this is to subclass a model from a GPyTorch model class (e.g. an ``ExactGP``) and this ``GPyTorchModel``. See e.g. ``SingleTaskGP``. """ likelihood: Likelihood @staticmethod def _validate_tensor_args( X: Tensor, Y: Tensor, Yvar: Tensor | None = None, strict: bool = True ) -> None: r"""Checks that ``Y`` and ``Yvar`` have an explicit output dimension if strict. Checks that the dtypes of the inputs match, and warns if using float. This also checks that ``Yvar`` has the same trailing dimensions as ``Y``. Note we only infer that an explicit output dimension exists when ``X`` and ``Y`` have the same ``batch_shape``. Args: X: A ``batch_shape x n x d``-dim Tensor, where ``d`` is the dimension of the feature space, ``n`` is the number of points per batch, and ``batch_shape`` is the batch shape (potentially empty). Y: A ``batch_shape' x n x m``-dim Tensor, where ``m`` is the number of model outputs, ``n'`` is the number of points per batch, and ``batch_shape'`` is the batch shape of the observations. Yvar: A ``batch_shape' x n x m`` tensor of observed measurement noise. Note: this will be None when using a model that infers the noise level (e.g. a ``SingleTaskGP``). strict: A boolean indicating whether to check that ``Y`` and ``Yvar`` have an explicit output dimension. """ if X.dim() != Y.dim(): if (X.dim() - Y.dim() == 1) and (X.shape[:-1] == Y.shape): message = ( "An explicit output dimension is required for targets." f" Expected Y with dimension {X.dim()} (got {Y.dim()=})." ) else: message = ( "Expected X and Y to have the same number of dimensions" f" (got X with dimension {X.dim()} and Y with dimension" f" {Y.dim()})." ) if strict: raise BotorchTensorDimensionError(message) else: warnings.warn( "Non-strict enforcement of botorch tensor conventions. The " "following error would have been raised with strict enforcement: " f"{message}", BotorchTensorDimensionWarning, stacklevel=2, ) # Yvar may not have the same batch dimensions, but the trailing dimensions # of Yvar should be the same as the trailing dimensions of Y. if Yvar is not None and Y.shape[-(Yvar.dim()) :] != Yvar.shape: raise BotorchTensorDimensionError( "An explicit output dimension is required for observation noise." f" Expected Yvar with shape: {Y.shape[-Yvar.dim() :]} (got" f" {Yvar.shape})." ) # Check the dtypes. if X.dtype != Y.dtype or (Yvar is not None and Y.dtype != Yvar.dtype): raise InputDataError( "Expected all inputs to share the same dtype. Got " f"{X.dtype} for X, {Y.dtype} for Y, and " f"{Yvar.dtype if Yvar is not None else None} for Yvar." ) if X.dtype != torch.float64: warnings.warn( _get_single_precision_warning(str(X.dtype)), InputDataWarning, stacklevel=3, # Warn at model constructor call. ) @property def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with ``m`` outputs, a ``test_batch_shape x q x d``-shaped input ``X`` to the ``posterior`` method returns a Posterior object over an output of shape ``broadcast(test_batch_shape, model.batch_shape) x q x m``. """ return self.train_inputs[0].shape[:-2] @property def num_outputs(self) -> int: r"""The number of outputs of the model.""" return self._num_outputs # pyre-fixme[14]: Inconsistent override. # ``botorch.models.gpytorch.GPyTorchModel.posterior`` overrides method defined # in ``Model`` inconsistently. Could not find parameter ``output_indices`` in # overriding signature.
[docs] def posterior( self, X: Tensor, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, **kwargs: Any, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A ``(batch_shape) x q x d``-dim Tensor, where ``d`` is the dimension of the feature space and ``q`` is the number of points considered jointly. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape ``(batch_shape) x q``). It is assumed to be in the outcome-transformed space if an outcome transform is used. posterior_transform: An optional PosteriorTransform. Returns: A ``GPyTorchPosterior`` object, representing a batch of ``b`` joint distributions over ``q`` points. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode # input transforms are applied at ``posterior`` in ``eval`` mode, and at # ``model.forward()`` at the training time X = self.transform_inputs(X) with gpt_posterior_settings(): # NOTE: BoTorch's GPyTorchModels also inherit from GPyTorch's ExactGP, thus # self(X) calls GPyTorch's ExactGP's __call__, which computes the posterior, # rather than e.g. SingleTaskGP's forward, which computes the prior. mvn = self(X) if observation_noise is not False: if isinstance(observation_noise, torch.Tensor): # TODO: Make sure observation noise is transformed correctly self._validate_tensor_args(X=X, Y=observation_noise) if observation_noise.size(-1) == 1: observation_noise = observation_noise.squeeze(-1) mvn = self.likelihood(mvn, X, noise=observation_noise) else: mvn = self.likelihood(mvn, X) posterior = GPyTorchPosterior(distribution=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior, X=X) if posterior_transform is not None: return posterior_transform(posterior=posterior, X=X) return posterior
[docs] def condition_on_observations( self, X: Tensor, Y: Tensor, noise: Tensor | None = None, **kwargs: Any ) -> Model: r"""Condition the model on new observations. Args: X: A ``batch_shape x n' x d``-dim Tensor, where ``d`` is the dimension of the feature space, ``n'`` is the number of points per batch, and ``batch_shape`` is the batch shape (must be compatible with the batch shape of the model). Y: A ``batch_shape' x n x m``-dim Tensor, where ``m`` is the number of model outputs, ``n'`` is the number of points per batch, and ``batch_shape'`` is the batch shape of the observations. ``batch_shape'`` must be broadcastable to ``batch_shape`` using standard broadcasting semantics. If ``Y`` has fewer batch dimensions than ``X``, it is assumed that the missing batch dimensions are the same for all ``Y``. noise: If not ``None``, a tensor of the same shape as ``Y`` representing the associated noise variance. kwargs: Passed to ``self.get_fantasy_model``. Returns: A ``Model`` object of the same type, representing the original model conditioned on the new observations ``(X, Y)`` (and possibly noise observations passed in via kwargs). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.sin(train_X[:, :1]) + torch.cos(train_X[:, 1:]) >>> model = SingleTaskGP(train_X, train_Y) >>> model.eval() >>> test_X = torch.rand(10, 2) # Need to evaluate once to fill test independent caches # so that condition_on_observations works. >>> model(test_X) >>> new_X = torch.rand(5, 2) >>> new_Y = torch.sin(new_X[:, :1]) + torch.cos(new_X[:, 1:]) >>> model = model.condition_on_observations(X=new_X, Y=new_Y) """ # pass the transformed data to get_fantasy_model below # (unless we've already transformed if BatchedMultiOutputGPyTorchModel) X_original = X.clone() X = self.transform_inputs(X) Yvar = noise if hasattr(self, "outcome_transform"): # And do the same for the outcome transform, if it exists. if not isinstance(self, BatchedMultiOutputGPyTorchModel): # ``noise`` is assumed to already be outcome-transformed. Y, _ = self.outcome_transform(Y=Y, Yvar=Yvar, X=X) # Validate using strict=False, since we cannot tell if Y has an explicit # output dimension. Do not check shapes when fantasizing as they are # not expected to match. if fantasize_flag.off(): self._validate_tensor_args(X=X, Y=Y, Yvar=Yvar, strict=False) if Y.size(-1) == 1: Y = Y.squeeze(-1) if Yvar is not None: kwargs.update({"noise": Yvar.squeeze(-1)}) # get_fantasy_model will properly copy any existing outcome transforms # (since it deepcopies the original model)) fantasy_model = self.get_fantasy_model(inputs=X, targets=Y, **kwargs) # If we use an input transform, the fantasized data will not get added to # the training data by default. We need to manually add it. if hasattr(fantasy_model, "input_transform"): # Broadcast tensors to compatible shape before concatenating expand_shape = torch.broadcast_shapes( X_original.shape[:-2], fantasy_model._original_train_inputs.shape[:-2] ) X_expanded = X_original.expand(expand_shape + X_original.shape[-2:]) orig_expanded = fantasy_model._original_train_inputs.expand( expand_shape + fantasy_model._original_train_inputs.shape[-2:] ) fantasy_model._original_train_inputs = torch.cat( [orig_expanded, X_expanded], dim=-2, ).detach() return fantasy_model
def _extract_targets_and_noise(self) -> tuple[Tensor, Tensor | None]: r"""Extract targets and noise variance in the correct shape. Returns a tuple of (Y, Yvar) where Y and Yvar have shape ``batch_shape x n x m``, with batch_shape included only if the training data initially contained it. """ if self.num_outputs > 1: Y = self.train_targets.transpose(-1, -2) Yvar = None if isinstance(self.likelihood, FixedNoiseGaussianLikelihood): Yvar = self.likelihood.noise_covar.noise.transpose(-1, -2) else: Y, Yvar = extract_targets_and_noise_single_output(self) return Y, Yvar def _restore_targets_and_noise( self, Y: Tensor, Yvar: Tensor | None, strict: bool ) -> None: r"""Restore targets and noise variance to the model. Args: Y: Targets tensor in shape ``batch_shape x n x m``. Yvar: Optional noise variance tensor in shape ``batch_shape x n x m``. strict: Whether to strictly enforce shape constraints. """ if self.num_outputs > 1: Y = Y.transpose(-1, -2) if Yvar is not None and isinstance( self.likelihood, FixedNoiseGaussianLikelihood ): Yvar = Yvar.transpose(-1, -2) self.likelihood.noise_covar.noise = Yvar self.set_train_data(targets=Y, strict=strict) else: restore_targets_and_noise_single_output(self, Y, Yvar, strict)
[docs] def load_state_dict( self, state_dict: Mapping[str, Any], strict: bool = True, keep_transforms: bool = True, assign: bool = False, ) -> None: r"""Load the model state. Args: state_dict: A dict containing the state of the model. strict: A boolean indicating whether to strictly enforce that the keys. keep_transforms: A boolean indicating whether to keep the input and outcome transforms. Doing so is useful when loading a model that was trained on a full set of data, and is later loaded with a subset of the data. assign: When set to ``False``, the properties of the tensors in the current module are preserved whereas setting it to ``True`` preserves properties of the Tensors in the state dict. The only exception is the ``requires_grad`` field of :class:`~torch.nn.Parameter` for which the value from the module is preserved. Default: ``False``. """ if assign: first_item = next(iter(state_dict.values())) self.to(first_item) if not keep_transforms: super().load_state_dict(state_dict=state_dict, strict=strict, assign=assign) return should_outcome_transform = ( hasattr(self, "train_targets") and getattr(self, "outcome_transform", None) is not None ) with torch.no_grad(): untransformed_Y, untransformed_Yvar = self._extract_targets_and_noise() X = self.train_inputs[0] if should_outcome_transform: try: untransformed_Y, untransformed_Yvar = ( self.outcome_transform.untransform( Y=untransformed_Y, Yvar=untransformed_Yvar, X=X, ) ) except NotImplementedError: warnings.warn( "Outcome transform does not support untransforming." "Cannot load the state dict with transforms preserved." "Setting keep_transforms=False.", BotorchWarning, stacklevel=3, ) super().load_state_dict( state_dict=state_dict, strict=strict, assign=assign ) return super().load_state_dict(state_dict=state_dict, strict=strict, assign=assign) if getattr(self, "input_transform", None) is not None: self.input_transform.eval() if should_outcome_transform: self.outcome_transform.eval() retransformed_Y, retransformed_Yvar = self.outcome_transform( Y=untransformed_Y, Yvar=untransformed_Yvar, X=X ) self._restore_targets_and_noise(retransformed_Y, retransformed_Yvar, strict)
# pyre-fixme[13]: uninitialized attributes _num_outputs, _input_batch_shape, # _aug_batch_shape
[docs] class BatchedMultiOutputGPyTorchModel(GPyTorchModel): r"""Base class for batched multi-output GPyTorch models with independent outputs. This model should be used when the same training data is used for all outputs. Outputs are modeled independently by using a different batch for each output. """ _num_outputs: int _input_batch_shape: torch.Size _aug_batch_shape: torch.Size
[docs] @staticmethod def get_batch_dimensions( train_X: Tensor, train_Y: Tensor ) -> tuple[torch.Size, torch.Size]: r"""Get the raw batch shape and output-augmented batch shape of the inputs. Args: train_X: A ``n x d`` or ``batch_shape x n x d`` (batch mode) tensor of training features. train_Y: A ``n x m`` or ``batch_shape x n x m`` (batch mode) tensor of training observations. Returns: 2-element tuple containing - The ``input_batch_shape`` - The output-augmented batch shape: ``input_batch_shape x (m)`` """ input_batch_shape = train_X.shape[:-2] aug_batch_shape = input_batch_shape num_outputs = train_Y.shape[-1] if num_outputs > 1: aug_batch_shape += torch.Size([num_outputs]) return input_batch_shape, aug_batch_shape
def _set_dimensions(self, train_X: Tensor, train_Y: Tensor) -> None: r"""Store the number of outputs and the batch shape. Args: train_X: A ``n x d`` or ``batch_shape x n x d`` (batch mode) tensor of training features. train_Y: A ``n x m`` or ``batch_shape x n x m`` (batch mode) tensor of training observations. """ self._num_outputs = train_Y.shape[-1] self._input_batch_shape, self._aug_batch_shape = self.get_batch_dimensions( train_X=train_X, train_Y=train_Y ) @property def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with ``m`` outputs, a ``test_batch_shape x q x d``-shaped input ``X`` to the ``posterior`` method returns a Posterior object over an output of shape ``broadcast(test_batch_shape, model.batch_shape) x q x m``. """ return self._input_batch_shape def _transform_tensor_args( self, X: Tensor, Y: Tensor, Yvar: Tensor | None = None ) -> tuple[Tensor, Tensor, Tensor | None]: r"""Transforms tensor arguments: for single output models, the output dimension is squeezed and for multi-output models, the output dimension is transformed into the left-most batch dimension. Args: X: A ``n x d`` or ``batch_shape x n x d`` (batch mode) tensor of training features. Y: A ``n x m`` or ``batch_shape x n x m`` (batch mode) tensor of training observations. Yvar: A ``n x m`` or ``batch_shape x n x m`` (batch mode) tensor of observed measurement noise. Note: this will be None when using a model that infers the noise level (e.g. a ``SingleTaskGP``). Returns: 3-element tuple containing - A ``input_batch_shape x (m) x n x d`` tensor of training features. - A ``target_batch_shape x (m) x n`` tensor of training observations. - A ``target_batch_shape x (m) x n`` tensor observed measurement noise (or None). """ if self._num_outputs > 1: return multioutput_to_batch_mode_transform( train_X=X, train_Y=Y, train_Yvar=Yvar, num_outputs=self._num_outputs ) return X, Y.squeeze(-1), None if Yvar is None else Yvar.squeeze(-1) def _apply_noise( self, X: Tensor, mvn: MultivariateNormal, observation_noise: bool | Tensor = False, ) -> MultivariateNormal: """Adds the observation noise to the posterior. Args: X: A tensor of shape ``batch_shape x q x d``. mvn: A ``MultivariateNormal`` object representing the posterior over the true latent function. num_outputs: The number of outputs of the model. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape ``(batch_shape) x q x m``). Returns: The posterior predictive. """ if observation_noise is False: return mvn # noise_shape is ``broadcast(test_batch_shape, model.batch_shape) x m x q`` noise_shape = mvn.batch_shape + mvn.event_shape if torch.is_tensor(observation_noise): # TODO: Validate noise shape # make observation_noise's shape match noise_shape if self.num_outputs > 1: obs_noise = observation_noise.transpose(-1, -2) else: obs_noise = observation_noise.squeeze(-1) mvn = self.likelihood( mvn, X, noise=obs_noise.expand(noise_shape), ) elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # Use the mean of the previous noise values (TODO: be smarter here). observation_noise = self.likelihood.noise.mean(dim=-1, keepdim=True) mvn = self.likelihood( mvn, X, noise=observation_noise.expand(noise_shape), ) else: mvn = self.likelihood(mvn, X) return mvn # pyre-ignore[14]: Inconsistent override. Could not find parameter # ``Keywords(typing.Any)`` in overriding signature.
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A ``(batch_shape) x q x d``-dim Tensor, where ``d`` is the dimension of the feature space and ``q`` is the number of points considered jointly. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the likelihood to the posterior. If a Tensor, use it directly as the observation noise (must be of shape ``(batch_shape) x q x m``). posterior_transform: An optional PosteriorTransform. Returns: A ``GPyTorchPosterior`` object, representing ``batch_shape`` joint distributions over ``q`` points and the outputs selected by ``output_indices`` each. Includes observation noise if specified. """ self.eval() # make sure model is in eval mode # input transforms are applied at ``posterior`` in ``eval`` mode, and at # ``model.forward()`` at the training time X = self.transform_inputs(X) with gpt_posterior_settings(): # insert a dimension for the output dimension if self._num_outputs > 1: X, output_dim_idx = add_output_dim( X=X, original_batch_shape=self._input_batch_shape ) # NOTE: BoTorch's GPyTorchModels also inherit from GPyTorch's ExactGP, thus # self(X) calls GPyTorch's ExactGP's __call__, which computes the posterior, # rather than e.g. SingleTaskGP's forward, which computes the prior. mvn = self(X) mvn = self._apply_noise(X=X, mvn=mvn, observation_noise=observation_noise) if self._num_outputs > 1: if torch.jit.is_tracing(): mvn = MultitaskMultivariateNormal.from_batch_mvn( mvn, task_dim=output_dim_idx ) else: mean_x = mvn.mean covar_x = mvn.lazy_covariance_matrix output_indices = output_indices or range(self._num_outputs) mvns = [ MultivariateNormal( mean_x.select(dim=output_dim_idx, index=t), covar_x[(slice(None),) * output_dim_idx + (t,)], ) for t in output_indices ] mvn = MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) posterior = GPyTorchPosterior(distribution=mvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior, X=X) if posterior_transform is not None: return posterior_transform(posterior=posterior, X=X) return posterior
# pyre-ignore[14]: Inconsistent override. Could not find parameter ``noise``.
[docs] def condition_on_observations( self, X: Tensor, Y: Tensor, **kwargs: Any ) -> BatchedMultiOutputGPyTorchModel: r"""Condition the model on new observations. Args: X: A ``batch_shape x n' x d``-dim Tensor, where ``d`` is the dimension of the feature space, ``m`` is the number of points per batch, and ``batch_shape`` is the batch shape (must be compatible with the batch shape of the model). Y: A ``batch_shape' x n' x m``-dim Tensor, where ``m`` is the number of model outputs, ``n'`` is the number of points per batch, and ``batch_shape'`` is the batch shape of the observations. ``batch_shape'`` must be broadcastable to ``batch_shape`` using standard broadcasting semantics. If ``Y`` has fewer batch dimensions than ``X``, it is assumed that the missing batch dimensions are the same for all ``Y``. Returns: A ``BatchedMultiOutputGPyTorchModel`` object of the same type with ``n + n'`` training examples, representing the original model conditioned on the new observations ``(X, Y)`` (and possibly noise observations passed in via kwargs). Example: >>> train_X = torch.rand(20, 2) >>> train_Y = torch.cat( >>> [torch.sin(train_X[:, 0]), torch.cos(train_X[:, 1])], -1 >>> ) >>> model = SingleTaskGP(train_X, train_Y) >>> new_X = torch.rand(5, 2) >>> new_Y = torch.cat([torch.sin(new_X[:, 0]), torch.cos(new_X[:, 1])], -1) >>> model = model.condition_on_observations(X=new_X, Y=new_Y) """ noise = kwargs.get("noise") if hasattr(self, "outcome_transform"): # We need to apply transforms before shifting batch indices around. # ``noise`` is assumed to already be outcome-transformed. Y, _ = self.outcome_transform(Y, X=X) # Do not check shapes when fantasizing as they are not expected to match. if fantasize_flag.off(): self._validate_tensor_args(X=X, Y=Y, Yvar=noise, strict=False) inputs = X if self._num_outputs > 1: inputs, targets, noise = multioutput_to_batch_mode_transform( train_X=X, train_Y=Y, num_outputs=self._num_outputs, train_Yvar=noise ) # ``multioutput_to_batch_mode_transform`` removes the output dimension, # which is necessary for ``condition_on_observations`` targets = targets.unsqueeze(-1) if noise is not None: noise = noise.unsqueeze(-1) else: inputs = X targets = Y if noise is not None: kwargs.update({"noise": noise}) fantasy_model = super().condition_on_observations(X=inputs, Y=targets, **kwargs) fantasy_model._input_batch_shape = fantasy_model.train_targets.shape[ : (-1 if self._num_outputs == 1 else -2) ] if not self._is_fully_bayesian: fantasy_model._aug_batch_shape = fantasy_model.train_targets.shape[:-1] return fantasy_model
[docs] def subset_output(self, idcs: list[int]) -> BatchedMultiOutputGPyTorchModel: r"""Subset the model along the output dimension. Args: idcs: The output indices to subset the model to. Returns: The current model, subset to the specified output indices. """ try: subset_batch_dict = self._subset_batch_dict except AttributeError: raise NotImplementedError( "`subset_output` requires the model to define a `_subset_batch_dict` " "attribute that lists the indices of the output dimensions in each " "model parameter that needs to be subset." ) m = len(idcs) new_model = deepcopy(self) subset_everything = self.num_outputs == m and idcs == list(range(m)) if subset_everything: return new_model tidxr = torch.tensor(idcs, device=new_model.train_targets.device) idxr = tidxr if m > 1 else idcs[0] new_tail_bs = torch.Size([m]) if m > 1 else torch.Size() new_model._num_outputs = m new_model._aug_batch_shape = new_model._aug_batch_shape[:-1] + new_tail_bs new_model.train_inputs = tuple( ti[..., idxr, :, :] for ti in new_model.train_inputs ) new_model.train_targets = new_model.train_targets[..., idxr, :] # adjust batch shapes of parameters/buffers if necessary for full_name, p in itertools.chain( new_model.named_parameters(), new_model.named_buffers() ): if full_name in subset_batch_dict: idx = subset_batch_dict[full_name] new_data = p.index_select(dim=idx, index=tidxr) if m == 1: new_data = new_data.squeeze(idx) p.data = new_data mod_name = full_name.split(".")[:-1] mod_batch_shape(new_model, mod_name, m if m > 1 else 0) # subset outcome transform if present try: subset_octf = new_model.outcome_transform.subset_output(idcs=idcs) new_model.outcome_transform = subset_octf except AttributeError: pass # Subset fixed noise likelihood if present. if isinstance(self.likelihood, FixedNoiseGaussianLikelihood): full_noise = new_model.likelihood.noise_covar.noise new_noise = full_noise[..., idcs if len(idcs) > 1 else idcs[0], :] new_model.likelihood.noise_covar.noise = new_noise return new_model
[docs] class ModelListGPyTorchModel(ModelList, GPyTorchModel, ABC): r"""Abstract base class for models based on multi-output GPyTorch models. This is meant to be used with a gpytorch ModelList wrapper for independent evaluation of submodels. Those submodels can themselves be multi-output models, in which case the task covariances will be ignored. """ @property def batch_shape(self) -> torch.Size: r"""The batch shape of the model. This is a batch shape from an I/O perspective, independent of the internal representation of the model (as e.g. in BatchedMultiOutputGPyTorchModel). For a model with ``m`` outputs, a ``test_batch_shape x q x d``-shaped input ``X`` to the ``posterior`` method returns a Posterior object over an output of shape ``broadcast(test_batch_shape, model.batch_shape) x q x m``. """ batch_shapes = {m.batch_shape for m in self.models} if len(batch_shapes) > 1: msg = ( f"Component models of {self.__class__.__name__} have different " "batch shapes" ) try: broadcast_shape = torch.broadcast_shapes(*batch_shapes) warnings.warn(msg + ". Broadcasting batch shapes.", stacklevel=2) return broadcast_shape except RuntimeError: raise NotImplementedError(msg + " that are not broadcastble.") return next(iter(batch_shapes))
[docs] def load_state_dict( self, state_dict: Mapping[str, Any], strict: bool = True, assign: bool = False, ) -> None: return ModelList.load_state_dict( self, state_dict=state_dict, strict=strict, assign=assign )
# pyre-fixme[14]: Inconsistent override in return types
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, ) -> GPyTorchPosterior | PosteriorList: r"""Computes the posterior over model outputs at the provided points. If any model returns a MultitaskMultivariateNormal posterior, then that will be split into individual MVNs per task, with inter-task covariance ignored. Args: X: A ``b x q x d``-dim Tensor, where ``d`` is the dimension of the feature space, ``q`` is the number of points considered jointly, and ``b`` is the batch dimension. output_indices: A list of indices, corresponding to the outputs over which to compute the posterior (if the model is multi-output). Can be used to speed up computation if only a subset of the model's outputs are required for optimization. If omitted, computes the posterior over all model outputs. observation_noise: If True, add the observation noise from the respective likelihoods to the posterior. If a Tensor of shape ``(batch_shape) x q x m``, use it directly as the observation noise (with ``observation_noise[...,i]`` added to the posterior of the ``i``-th model). posterior_transform: An optional PosteriorTransform. Returns: - If no ``posterior_transform`` is provided and the component models have no ``outcome_transform``, or if the component models only use linear outcome transforms like ``Standardize`` (i.e. not ``Log``), returns a ``GPyTorchPosterior`` or ``GaussianMixturePosterior`` object, representing ``batch_shape`` joint distributions over ``q`` points and the outputs selected by ``output_indices`` each. Includes measurement noise if ``observation_noise`` is specified. - If no ``posterior_transform`` is provided and component models have nonlinear transforms like ``Log``, returns a ``PosteriorList`` with sub-posteriors of type ``TransformedPosterior`` - If ``posterior_transform`` is provided, that posterior transform will be applied and will determine the return type. This could be any subclass of ``Posterior``, but common choices give a ``GPyTorchPosterior``. """ # Nonlinear transforms untransform to a ``TransformedPosterior``, # which can't be made into a ``GPyTorchPosterior`` returns_untransformed = any( hasattr(mod, "outcome_transform") and (not mod.outcome_transform._is_linear) for mod in self.models ) # NOTE: We're not passing in the posterior transform here. We'll apply it later. posterior = ModelList.posterior( self, X=X, output_indices=output_indices, observation_noise=observation_noise, ) if not returns_untransformed: mvns = [p.distribution for p in posterior.posteriors] if any(isinstance(m, MultitaskMultivariateNormal) for m in mvns): mvn_list = [] for mvn in mvns: if len(mvn.event_shape) == 2: # We separate MTMVNs into independent-across-task MVNs for # the convenience of using BlockDiagLinearOperator below. # (b x q x m x m) -> list of m (b x q x 1 x 1) mvn_list.extend(separate_mtmvn(mvn)) else: mvn_list.append(mvn) mean = torch.stack([mvn.mean for mvn in mvn_list], dim=-1) covars = CatLinearOperator( *[mvn.lazy_covariance_matrix.unsqueeze(-3) for mvn in mvn_list], dim=-3, ) # List of m (b x q x 1 x 1) -> (b x q x m x 1 x 1) mvn = MultitaskMultivariateNormal( mean=mean, covariance_matrix=BlockDiagLinearOperator(covars, block_dim=-3).to( X ), # (b x q x m x 1 x 1) -> (b x q x m x m) interleaved=False, ) else: mvns = self._broadcast_mvns(mvns=mvns) mvn = ( mvns[0] if len(mvns) == 1 else MultitaskMultivariateNormal.from_independent_mvns(mvns=mvns) ) # Return the result as a GPyTorchPosterior/GaussianMixturePosterior. if any(is_ensemble(m) for m in self.models): # Mixing fully Bayesian and other GP models is currently # not supported. posterior = GaussianMixturePosterior(distribution=mvn) else: posterior = GPyTorchPosterior(distribution=mvn) if posterior_transform is not None: return posterior_transform(posterior=posterior, X=X) return posterior
[docs] def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Model: raise NotImplementedError()
def _broadcast_mvns(self, mvns: list[MultivariateNormal]) -> MultivariateNormal: """Broadcasts the batch shapes of the given MultivariateNormals. The MVNs will have a batch shape of ``input_batch_shape x model_batch_shape``. If the model batch shapes are broadcastable, we will broadcast the mvns to a batch shape of ``input_batch_shape x self.batch_shape``. Args: mvns: A list of MultivariateNormals. Returns: A list of MultivariateNormals with broadcasted batch shapes. """ mvn_batch_shapes = {mvn.batch_shape for mvn in mvns} if len(mvn_batch_shapes) == 1: # All MVNs have the same batch shape. We can return as is. return mvns # This call will error out if they're not broadcastable. # If they're broadcastable, it'll log a warning. target_model_shape = self.batch_shape max_batch = max(mvn_batch_shapes, key=len) max_len = len(max_batch) input_batch_len = max_len - len(target_model_shape) for i in range(len(mvns)): # Loop over index since we modify contents. while len(mvns[i].batch_shape) < max_len: # MVN is missing batch dimensions. Unsqueeze as needed. mvns[i] = mvns[i].unsqueeze(input_batch_len) if mvns[i].batch_shape != max_batch: # Expand to match the batch shapes. mvns[i] = mvns[i].expand(max_batch) return mvns
[docs] class MultiTaskGPyTorchModel(GPyTorchModel, ABC): r"""Abstract base class for multi-task models based on GPyTorch models. This class provides the ``posterior`` method to models that implement a "long-format" multi-task GP in the style of ``MultiTaskGP``. """ def _extract_targets_and_noise(self) -> tuple[Tensor, Tensor | None]: r"""Extract targets and noise variance for multi-task models. Returns a tuple of (Y, Yvar) where Y and Yvar have shape ``batch_shape x n x m``, with batch_shape included only if the training data initially contained it. """ return extract_targets_and_noise_single_output(self) def _restore_targets_and_noise( self, Y: Tensor, Yvar: Tensor | None, strict: bool ) -> None: r"""Restore targets and noise variance for multi-task models. Args: Y: Targets tensor in shape ``batch_shape x n x m``. Yvar: Optional noise variance tensor in shape ``batch_shape x n x m``. strict: Whether to strictly enforce shape constraints. """ restore_targets_and_noise_single_output(self, Y, Yvar, strict) def _apply_noise( self, X: Tensor, mvn: MultivariateNormal, observation_noise: bool | Tensor, ) -> MultivariateNormal: """Adds the observation noise to the posterior. If the likelihood is a ``FixedNoiseGaussianLikelihood``, then the average noise per task is computed, and a diagonal noise matrix is added to the posterior covariance matrix, where the noise per input is the average noise for its respective task. If the likelihood is a Gaussian likelihood, then currently there is a shared inferred noise level for all tasks. TODO: implement support for task-specific inferred noise levels. Args: X: A tensor of shape ``batch_shape x q x d + 1``, where ``d`` is the dimension of the feature space and the ``+ 1`` dimension is the task feature / index. mvn: A ``MultivariateNormal`` object representing the posterior over the true latent function. num_outputs: The number of outputs of the model. observation_noise: If True, add observation noise from the respective likelihood. Tensor input is currently not supported. Returns: The posterior predictive. """ if torch.is_tensor(observation_noise): raise NotImplementedError( "Passing a tensor of observations is not supported by MultiTaskGP." ) elif observation_noise is False: return mvn elif isinstance(self.likelihood, FixedNoiseGaussianLikelihood): # get task features for test points test_task_features = X[..., self._task_feature] test_task_features = self._map_tasks(test_task_features).long() unique_test_task_features = test_task_features.unique() # get task features for training points train_task_features = self.train_inputs[0][..., self._task_feature] train_task_features = self._map_tasks(train_task_features).long() noise_by_task = torch.zeros( *self.batch_shape, self.num_tasks, dtype=X.dtype, device=X.device ) for task_feature in unique_test_task_features: mask = train_task_features == task_feature noise_by_task[..., task_feature] = self.likelihood.noise[ ..., mask ].mean(dim=-1) # noise_shape is ``broadcast(test_batch_shape, model.batch_shape) x q`` noise_shape = ( broadcast_shapes(X.shape[:-2], self.batch_shape) + X.shape[-2:-1] ) # Expand and gather ensures we pick correct noise dimensions for # batch evaluations of batched models. observation_noise = noise_by_task.expand(*noise_shape[:-1], -1).gather( dim=-1, index=test_task_features.expand(noise_shape) ) return self.likelihood( mvn, X, noise=observation_noise, ) return self.likelihood(mvn, X) # pyre-ignore[14]: Inconsistent override. Could not find parameter # ``Keywords(typing.Any)`` in overriding signature.
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior over model outputs at the provided points. Args: X: A tensor of shape ``batch_shape x q x d`` or ``batch_shape x q x (d + 1)``, where ``d`` is the dimension of the feature space (not including task indices) and ``q`` is the number of points considered jointly. The ``+ 1`` dimension is the optional task feature / index. If given, the model produces the outputs for the given task indices. If omitted, the model produces outputs for tasks in ``self._output_tasks`` (specified as ``output_tasks`` while constructing the model), which can be overwritten using ``output_indices``. output_indices: A list of task values over which to compute the posterior. Only used if ``X`` does not include the task feature. If omitted, defaults to ``self._output_tasks``. observation_noise: If True, add observation noise from the respective likelihoods. If a Tensor, specifies the observation noise levels to add. posterior_transform: An optional PosteriorTransform. Returns: A ``GPyTorchPosterior`` object, representing ``batch_shape`` joint distributions over ``q`` points. If the task features are included in ``X``, the posterior will be single output. Otherwise, the posterior will be single or multi output corresponding to the tasks included in either the ``output_indices`` or ``self._output_tasks``. """ includes_task_feature = X.shape[-1] == self.num_non_task_features + 1 if includes_task_feature: if output_indices is not None: raise ValueError( "`output_indices` must be None when `X` includes task features." ) task_features = X[..., self._task_feature].unique() num_outputs = 1 X_full = X else: # Add the task features to construct the full X for evaluation. task_features = torch.tensor( self._output_tasks if output_indices is None else output_indices, dtype=torch.long, device=X.device, ) num_outputs = len(task_features) X_full = _make_X_full( X=X, output_indices=task_features.tolist(), tf=self._task_feature ) # Make sure all task feature values are valid. task_features = self._map_tasks(task_values=task_features) self.eval() # make sure model is in eval mode # input transforms are applied at ``posterior`` in ``eval`` mode, and at # ``model.forward()`` at the training time X_full = self.transform_inputs(X_full) with gpt_posterior_settings(): mvn = self(X_full) mvn = self._apply_noise( X=X_full, mvn=mvn, observation_noise=observation_noise, ) # If single-output, return the posterior of a single-output model if num_outputs == 1: posterior = GPyTorchPosterior(distribution=mvn) else: # Otherwise, make a MultitaskMultivariateNormal out of this mtmvn = MultitaskMultivariateNormal( mean=mvn.mean.view(*mvn.mean.shape[:-1], num_outputs, -1).transpose( -1, -2 ), covariance_matrix=mvn.lazy_covariance_matrix, interleaved=False, ) posterior = GPyTorchPosterior(distribution=mtmvn) if hasattr(self, "outcome_transform"): posterior = self.outcome_transform.untransform_posterior(posterior, X=X) if posterior_transform is not None: return posterior_transform(posterior=posterior, X=X) return posterior
[docs] def subset_output(self, idcs: list[int]) -> MultiTaskGPyTorchModel: r"""Returns a new model that only outputs a subset of the outputs. Args: idcs: A list of output indices, corresponding to the outputs to keep. Returns: A new model that only outputs the requested outputs. """ raise UnsupportedError( "Subsetting outputs is not supported by `MultiTaskGPyTorchModel`." )