Source code for mpcrl.optim.gradient_free_optimizer

from abc import ABC, abstractmethod
from typing import Optional, Union

import numpy as np
import numpy.typing as npt

from .base_optimizer import BaseOptimizer



[docs]
class GradientFreeOptimizer(BaseOptimizer, ABC):
    """Base class for gradient-free optimization algorithms, e.g., Bayesian
    Optimization.

    This optimizer adopts the ask-tell interface, i.e., it must implement the
    :meth:`GradientFreeOptimizer.ask` and :meth:`GradientFreeOptimizer.tell` methods.
    The former allows the agent to ask for a new set of parameters to evaluate, while
    the latter allows the agent to tell the optimizer the values of the objective
    function(s) for the set of parameters it asked for.
    """

    prefers_dict: bool
    """A flag that specifies whether the optimizer prefers to receive the learnable
    parameters as a dictionary of names and values or as a single concatenated array."""


[docs]
    @abstractmethod
    def ask(
        self,
    ) -> tuple[
        Union[dict[str, npt.ArrayLike], npt.ArrayLike],
        Optional[str],
    ]:
        """Asks the learning agent for a new set of parameters to evaluate.

        Returns
        -------
        dict of (str, 1d arrays) or a single 1d array
            A dictionary of learnable parameter names and their corresponding values.
            Or a single array that results from the concatenation of the parameter
            values.
        str, optional (default=None)
            A string that specifies the status of the optimizer. This is useful to
            communicate to the learning agent whether the optimization algorithm has
            encountered, e.g., some error or failure.
        """



[docs]
    @abstractmethod
    def tell(
        self,
        values: Union[dict[str, npt.NDArray[np.floating]], npt.NDArray[np.floating]],
        objective: Union[float, npt.NDArray[np.floating]],
    ) -> None:
        """Tells the learning agent the values of the objective function for the set of
        parameters it asked for.

        Parameters
        ----------
        values : dict of (str, 1d arrays) or a single 1d array
            A dictionary of learnable parameter names and their corresponding values for
            which the objective function(s) was (were) evaluated. Or a single array that
            results from the concatenation of the parameter values. This depends on the
            optimizer's :attr:`prefers_dict` class attribute.
        objective : float or array
            Value(s) of the objective function(s) for the set of parameters. Can be
            single-objective or multi-objective.
        """