Source code for botorch.models.heterogeneous_mtgp

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# pyre-strict

r"""
Multi-Task GP model designed to operate on tasks from different search spaces.

References:

.. [Deshwal2024Heterogeneous]
    A. Deshwal, S. Cakmak., Y. Xia, and D. Eriksson.
    Sample-Efficient Bayesian Optimization with Transfer Learning for
    Heterogeneous Search Spaces. AutoML Conference, 2024.
"""

from typing import Any

import torch
from botorch.acquisition.objective import PosteriorTransform
from botorch.exceptions.errors import UnsupportedError
from botorch.models.kernels.heterogeneous_multitask import MultiTaskConditionalKernel
from botorch.models.multitask import MultiTaskGP
from botorch.models.transforms.input import InputTransform
from botorch.models.transforms.outcome import OutcomeTransform
from botorch.posteriors.gpytorch import GPyTorchPosterior
from botorch.posteriors.transformed import TransformedPosterior
from botorch.utils.datasets import MultiTaskDataset
from torch import Tensor


[docs] class HeterogeneousMTGP(MultiTaskGP): """A multi-task GP model designed to operate on tasks from different search spaces. This model uses ``MultiTaskConditionalKernel``. This model was introduced in [Deshwal2024Heterogeneous]_. * The model is designed to work with a ``MultiTaskDataset`` that contains datasets with different features. * It uses a helper to embed the ``X`` coming from the sub-spaces into the full-feature space (+ task feature) before passing them down to the base ``MultiTaskGP``. * The same helper is used in the ``posterior`` method to embed the ``X`` from the target task into the full dimensional space before evaluating the ``posterior`` method of the base class. * This model also overwrites the ``_split_inputs`` method. Instead of ``x_basic``, we return the ``X`` with task feature included since this is used by the ``MultiTaskConditionalKernel`` to identify the active dimensions of / the kernels to evaluate for the given input. """ def __init__( self, train_Xs: list[Tensor], train_Ys: list[Tensor], train_Yvars: list[Tensor] | None, feature_indices: list[list[int]], full_feature_dim: int, rank: int | None = None, use_saas_prior: bool = True, use_combinatorial_kernel: bool = True, all_tasks: list[int] | None = None, input_transform: InputTransform | None = None, outcome_transform: OutcomeTransform | None = None, validate_task_values: bool = True, ) -> None: """Construct a heterogeneous multi-task GP model from lists of inputs corresponding to each task. NOTE: This model assumes that the task 0 is the output / target task. It will only produce predictions for task 0. Args: train_Xs: A list of tensors of shape ``(n_i x d_i)`` where ``d_i`` is the dimensionality of the input features for task i. NOTE: These should not include the task feature! train_Ys: A list of tensors of shape ``(n_i x 1)`` containing the observations for the corresponding task. train_Yvars: An optional list of tensors of shape ``(n_i x 1)`` containing the observation variances for the corresponding task. feature_indices: A list of lists of integers specifying the indices mapping the features from a given task to the full tensor of features. The ``i``th element of the list should contain ``d_i`` integers. full_feature_dim: The total number of features across all tasks. This does not include the task feature dimension. rank: The rank of the cross-task covariance matrix. use_saas_prior: Whether to use the SAAS prior for base kernels of the ``MultiTaskConditionalKernel``. use_combinatorial_kernel: Whether to use a combinatorial kernel over the binary embedding of task features in ``MultiTaskConditionalKernel``. all_tasks: By default, multi-task GPs infer the list of all tasks from the task features in ``train_X``. This is an experimental feature that enables creation of multi-task GPs with tasks that don't appear in the training data. Note that when a task is not observed, the corresponding task covariance will heavily depend on random initialization and may behave unexpectedly. input_transform: An input transform that is applied in the model's forward pass. The transform should be compatible with the inputs from the full feature space with the task feature appended. outcome_transform: An outcome transform that is applied to the training data during instantiation and to the posterior during inference (that is, the ``Posterior`` obtained by calling ``.posterior`` on the model will be on the original scale). validate_task_values: If True, validate that the task values supplied in the input are expected tasks values. If false, unexpected task values will be mapped to the first output_task if supplied. """ self.full_feature_dim = full_feature_dim self.feature_indices = feature_indices imputation_values = self._compute_imputation_values( train_Xs=train_Xs, feature_indices=feature_indices, full_feature_dim=full_feature_dim, ) # The first time we map to full tensor, we have to pass in the imputation values # as they have not yet been registered as buffers - this has to wait until after # super().__init__. full_X = torch.cat( [ self.map_to_full_tensor( X=X, task_index=i, imputation_values=imputation_values ) for i, X in enumerate(train_Xs) ] ) full_Y = torch.cat(train_Ys) full_Yvar = None if train_Yvars is None else torch.cat(train_Yvars) covar_module = MultiTaskConditionalKernel( feature_indices=feature_indices, use_saas_prior=use_saas_prior, use_combinatorial_kernel=use_combinatorial_kernel, ) # The features that are forward passed through the kernel should include # the task dim covar_module.active_dims = torch.arange(full_feature_dim + 1) likelihood = None # Constructed in MultiTaskGP. super().__init__( train_X=full_X, train_Y=full_Y, task_feature=-1, train_Yvar=full_Yvar, mean_module=None, covar_module=covar_module, likelihood=likelihood, output_tasks=[0], rank=rank, all_tasks=all_tasks, input_transform=input_transform, outcome_transform=outcome_transform, validate_task_values=validate_task_values, ) self.register_buffer("feature_imputation_values", imputation_values)
[docs] @classmethod def get_all_tasks( cls, train_X: Tensor, task_feature: int, output_tasks: list[int] | None = None, ) -> tuple[list[int], int, int]: ( all_tasks_inferred, task_feature, num_non_task_features, ) = super().get_all_tasks( train_X=train_X, task_feature=task_feature, output_tasks=output_tasks ) if 0 not in all_tasks_inferred: all_tasks_inferred = [0] + all_tasks_inferred return all_tasks_inferred, task_feature, num_non_task_features
[docs] def map_to_full_tensor( self, X: Tensor, task_index: int, imputation_values: Tensor | None = None, ) -> Tensor: """Map a tensor of task-specific features to the full tensor of features, utilizing the feature indices to map each feature to its corresponding position in the full tensor. Also append the task index as the last column. The columns of the full tensor that are not used by the given task are filled with the per-dimension empirical mean computed across all tasks that contain that dimension (see ``_compute_imputation_values``). This avoids out-of-domain padding values that would otherwise be squashed by an input transform with fixed bounds (e.g. ``Normalize``). Args: X: A tensor of shape ``(n x d_i)`` where ``d_i`` is the number of features in the original task dataset. task_index: The index of the task whose features are being mapped. imputation_values: Optional pre-computed imputation values. If not provided, uses ``self.feature_imputation_values``. Returns: A tensor of shape ``(n x (self.full_feature_dim + 1))`` containing the mapped features. Example: >>> # Suppose full feature dim is 3, the feature indices for task 5 >>> # are [2, 0], and the empirical mean for missing dim 1 is 7.0. >>> X = torch.tensor([[1.0, 2.0], [3.0, 4.0]]) >>> X_full = self.map_to_full_tensor(X=X, task_index=5) >>> # X_full = torch.tensor([[2.0, 7.0, 1.0, 5.0], [4.0, 7.0, 3.0, 5.0]]) """ if imputation_values is None: imputation_values = self.feature_imputation_values X_full = torch.zeros( *X.shape[:-1], self.full_feature_dim + 1, dtype=X.dtype, device=X.device ) X_full[..., : self.full_feature_dim] = imputation_values X_full[..., self.feature_indices[task_index]] = X X_full[..., -1] = task_index return X_full
@staticmethod def _compute_imputation_values( train_Xs: list[Tensor], feature_indices: list[list[int]], full_feature_dim: int, ) -> Tensor: """Compute per-dimension empirical mean across all tasks that contain each dimension of the joint feature space. For each dimension ``d`` in ``[0, full_feature_dim)``, collects the values from every task's ``train_X`` column that maps to ``d`` and takes the mean. These values are used by ``map_to_full_tensor`` to impute missing dims when embedding a per-task ``X`` into the full feature space. Returns: A tensor of shape ``(full_feature_dim,)`` with the per-dim mean. If a dimension is not present in any task (which should not occur under the constructor's invariants), the value defaults to 0. """ dtype = train_Xs[0].dtype device = train_Xs[0].device imputation = torch.zeros(full_feature_dim, dtype=dtype, device=device) for d in range(full_feature_dim): values: list[Tensor] = [] for indices, X in zip(feature_indices, train_Xs): if d in indices and X.numel() > 0: values.append(X[..., indices.index(d)].reshape(-1)) if values: imputation[d] = torch.cat(values).mean() return imputation
[docs] def posterior( self, X: Tensor, output_indices: list[int] | None = None, observation_noise: bool | Tensor = False, posterior_transform: PosteriorTransform | None = None, **kwargs: Any, ) -> GPyTorchPosterior | TransformedPosterior: r"""Computes the posterior for the target task at the provided points. Args: X: A tensor of shape ``batch_shape x q x (d_0 + 1)``, where ``d_0`` is the dimension of the feature space for task 0 and the last column is the task indicator (must be 0 for the target task). output_indices: Not supported. Must be ``None`` or ``[0]``. observation_noise: If True, add observation noise from the respective likelihoods. If a Tensor, specifies the observation noise levels to add. posterior_transform: An optional PosteriorTransform. Returns: A ``GPyTorchPosterior`` object, representing ``batch_shape`` joint distributions over ``q`` points. """ if output_indices is not None and output_indices != [0]: raise UnsupportedError( "Heterogeneous MTGP does not support `output_indices`. " ) d_target = len(self.feature_indices[0]) if X.shape[-1] != d_target + 1: raise ValueError( f"Expected X with {d_target + 1} columns " f"({d_target} target features + 1 task column), " f"got {X.shape[-1]}." ) if (X[..., -1] != 0).any(): raise UnsupportedError("Posterior can only be called for the target task.") X = X[..., :-1] X_full = self.map_to_full_tensor(X=X, task_index=0) return super().posterior( X=X_full, observation_noise=observation_noise, posterior_transform=posterior_transform, **kwargs, )
def _split_inputs(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]: r"""Returns x itself along with a tensor containing the task indices only. NOTE: This differs from the base class implementation because it returns the full tensor in place of ``x_basic``. This is because the multi-task conditional kernel utilized the task feature for conditioning. Args: x: The full input tensor with trailing dimension of size ``self.full_feature_dim + 1 + 1``. Returns: 3-element tuple containing - The original tensor ``x``. - A tensor of long data type containing the task indices. - A tensor with d=0. split_inputs by default returns X_before_index, task_indices, X_after_index, and so thus has to return a 3-tuple. """ task_idcs = x[..., self._task_feature : self._task_feature + 1].to( dtype=torch.long ) return x, task_idcs, torch.zeros(x.shape[:-1] + (0,)).to(x)
[docs] @classmethod # pyre-ignore [14] Inconsistent override is expected. def construct_inputs( cls, training_data: MultiTaskDataset, task_feature: int = -1, output_tasks: list[int] | None = None, rank: int | None = None, use_saas_prior: bool = True, use_combinatorial_kernel: bool = True, map_heterogeneous_to_full: bool = False, ) -> dict[str, Any]: r"""Construct ``Model`` keyword arguments from a given ``MultiTaskDataset``. Args: training_data: A ``MultiTaskDataset``. task_feature: Column index of embedded task indicator features. Only supported value is ``-1``. output_tasks: A list of task indices for which to compute model outputs for. Only supported value is ``[0]``. rank: The rank of the cross-task covariance matrix. use_saas_prior: Whether to use the SAAS prior for base kernels of the ``MultiTaskConditionalKernel``. use_combinatorial_kernel: Whether to use a combinatorial kernel over the binary embedding of task features in ``MultiTaskConditionalKernel``. map_heterogeneous_to_full: Accepted for compatibility with ``MultiTaskGP.construct_inputs`` but unused. ``HeterogeneousMTGP`` handles heterogeneous features via ``MultiTaskConditionalKernel``. """ if training_data.task_feature_index != -1: raise NotImplementedError( "Heterogeneous MTGP requires `task_feature_index` to be -1." ) if task_feature != -1: raise NotImplementedError("Heterogeneous MTGP requires `task_feature=-1`.") if output_tasks is not None and output_tasks != [0]: raise NotImplementedError( "Heterogeneous MTGP currently only supports output_tasks=[0]. " "The target task will be given the task value of 0." ) all_datasets, feature_indices, full_feature_dim = ( training_data.get_heterogeneous_feature_mapping() ) Xs = [ds.X[..., :-1] for ds in all_datasets] Ys = [ds.Y for ds in all_datasets] Yvars = ( None if all_datasets[0].Yvar is None else [ds.Yvar for ds in all_datasets] ) all_tasks = list(range(len(all_datasets))) return { "train_Xs": Xs, "train_Ys": Ys, "train_Yvars": Yvars, "feature_indices": feature_indices, "full_feature_dim": full_feature_dim, "rank": rank, "use_saas_prior": use_saas_prior, "use_combinatorial_kernel": use_combinatorial_kernel, "all_tasks": all_tasks, }