Source code for mpcrl.util.control

"""A collection of basic utility functions for control applications. In particular, it
contains functions for solving the LQR problems in continuous- and discrete-time,
discretization methods such as Runge-Kutta 4, and functions to build Control Barrier
Functions.

Some inspiration was drawn from `MPCtools <https://bitbucket.org/rawlings-group/mpc-tools-casadi/src/master/mpctools/util.py>`_.
"""

from collections.abc import Iterable as _Iterable
from typing import Callable, Optional

import casadi as cs
import numpy as np
import numpy.typing as npt
from scipy.linalg import solve_continuous_are as _solve_continuous_are
from scipy.linalg import solve_discrete_are as _solve_discrete_are

from .math import SymOrNumType, SymType
from .math import dual_norm as _dual_norm
from .math import lie_derivative as _lie_derivative


[docs] def lqr( A: npt.NDArray[np.floating], B: npt.NDArray[np.floating], Q: npt.NDArray[np.floating], R: npt.NDArray[np.floating], M: Optional[npt.NDArray[np.floating]] = None, ) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]: r"""Computes the solution to the continuous-time LQR problem. The LQR problem is to solve the following optimization problem .. math:: \min_{u} \int_{0}^{\infty} x^\top Q x + u^\top R u + 2 x^\top M u for the linear time-invariant continuous-time system .. math:: \dot{x} = A x + B u. The (famous) solution takes the form of a state feedback law .. math:: u = -K x with a quadratic cost-to-go function .. math:: V(x) = x^\top P x. The function returns the optimal state feedback matrix :math:`K` and the quadratic terminal cost-to-go matrix :math:`P`. If not provided, ``M`` is assumed to be zero. Parameters ---------- A : array State matrix. B : array Control input matrix. Q : array State weighting matrix. R : array Control input weighting matrix. M : array, optional Mixed state-input weighting matrix, by default ``None``. Returns ------- tuple of two arrays Returns the optimal state feedback matrix :math:`K` and the quadratic terminal cost-to-go matrix :math:`P`. """ P = _solve_continuous_are(A, B, Q, R, None, M) rhs = B.T.dot(P) if M is None else B.T.dot(P) + M.T K = np.linalg.solve(R, rhs) return K, P
[docs] def dlqr( A: npt.NDArray[np.floating], B: npt.NDArray[np.floating], Q: npt.NDArray[np.floating], R: npt.NDArray[np.floating], M: Optional[npt.NDArray[np.floating]] = None, ) -> tuple[npt.NDArray[np.floating], npt.NDArray[np.floating]]: r"""Computes the solution to the discrete-time LQR problem. The LQR problem is to solve the following optimization problem .. math:: \min_{u} \sum_{t=0}^{\infty} x_t^\top Q x_t + u_t^\top R u_t + 2 x_t^\top M u_t for the linear time-invariant discrete-time system .. math:: x_{t+1} = A x_t + B u_t. The (famous) solution takes the form of a state feedback law .. math:: u_t = -K x_t with a quadratic cost-to-go function .. math:: V(x_t) = x_t^\top P x_t. The function returns the optimal state feedback matrix :math:`K` and the quadratic terminal cost-to-go matrix :math:`P`. If not provided, ``M`` is assumed to be zero. Parameters ---------- A : array State matrix. B : array Control input matrix. Q : array State weighting matrix. R : array Control input weighting matrix. M : array, optional Mixed state-input weighting matrix, by default ``None``. Returns ------- tuple of two arrays Returns the optimal state feedback matrix :math:`K` and the quadratic terminal cost-to-go matrix :math:`P`. """ P = _solve_discrete_are(A, B, Q, R, None, M) rhs = B.T.dot(P).dot(A) if M is None else B.T.dot(P).dot(A) + M.T K = np.linalg.solve(B.T.dot(P).dot(B) + R, rhs) return K, P
[docs] def rk4( f: Callable[[SymOrNumType], SymOrNumType], x0: SymOrNumType, dt: float = 1, M: int = 1, ) -> SymOrNumType: r"""Computes the Runge-Kutta 4 integration of the given function ``f`` with initial state ``x0``. In mathematical terms, given a continuous-time dynamics function :math:`\dot{x} = f(x)`, this method returns a discretized version of the dynamics :math:`x_{k+1} = f_d(x_k)` using the Runge-Kutta 4 method. Parameters ---------- f : Callable[[casadi or array], casadi or array] A function that takes a state as input and returns the derivative of the state, i.e., continuous-time dynamics. x0 : casadi or array The initial state. Must be compatible as an argument to ``f``. dt : float, optional The discretization timestep, by default ``1``. M : int, optional How many RK4 steps to take in one ``dt`` interval, by default ``1``. Returns ------- new state : casadi or array The new state after ``dt`` time, according to the discretization. """ dt /= M x = x0 for _ in range(M): k1 = f(x) k2 = f(x + k1 * dt / 2) k3 = f(x + k2 * dt / 2) k4 = f(x + k3 * dt) x = x + (k1 + 2 * k2 + 2 * k3 + k4) * dt / 6 return x
[docs] def cbf( h: Callable[[SymType], SymType], x: SymType, u: SymType, dynamics: Callable[[SymType, SymType], SymType], alphas: _Iterable[Callable[[SymType], SymType]], ) -> SymType: r"""Continuous-time Control Barrier Function (CBF) for the given constraint ``h`` and system with dynamics ``dynamics``. This method constructs a CBF for the constraint :math:`h(x) \geq 0` using the given system's dynamics :math:`\dot{x} = f(x, u)`. Here, :math:`\dot{x}` is the time derivative of the state after applying control input :math:`u`, and :math:`f` is the dynamics function ``dynamics``. The method can also compute a High-Order (HO) CBF by passing more than one class :math:`\mathcal{K}` functions ``alphas``. As per [1]_, the HO-CBF :math:`\phi_m` of degree :math:`m` is recursively found as .. math:: \phi_m(x) = \dot{\phi}_{m-1}(x) + \alpha_m(\phi_{m-1}(x)) and should be imposed as the constraint :math:`\phi_m(x) \geq 0`. Parameters ---------- h : callable The constraint function for which to build the CBF. It must be of the signature :math:`x \rightarrow h(x)`. x : casadi SX or MX The state vector variable :math:`x`. u : casadi SX or MX The control input vector variable :math:`u`. dynamics : callable The dynamics function :math:`f` with signature :math:`x,u \rightarrow f(x, u)`. alphas : iterable of callables An iterable of class :math:`\mathcal{K}` functions :math:`\alpha_m` for the HO-CBF. The length of the iterable determines the degree of the HO-CBF. Returns ------- casadi SX or MX Returns the HO-CBF function :math:`\phi_m` as a symbolic variable that is function of the provided ``x`` and ``u``. References ---------- .. [1] Wei Xiao and Calin Belta. High-order control barrier functions. *IEEE Transactions on Automatic Control*, 67(7), 3655-3662, 2021. Examples -------- >>> import casadi as cs >>> A = cs.SX.sym("A", 2, 2) >>> B = cs.SX.sym("B", 2, 1) >>> x = cs.SX.sym("x", A.shape[0], 1) >>> u = cs.SX.sym("u", B.shape[1], 1) >>> dynamics = lambda x, u: A @ x + B @ u >>> M = cs.SX.sym("M") >>> c = cs.SX.sym("c") >>> gamma = cs.SX.sym("gamma") >>> alphas = [lambda z: gamma * z] >>> h = lambda x: M - c * x[0] # >= 0 >>> cbf = cbf(h, x, u, dynamics, alphas) >>> print(cbf) """ x_dot = dynamics(x, u) phi = h(x) for alpha in alphas: phi = _lie_derivative(phi, x, x_dot) + alpha(phi) # cs.depends_on(phi, u) # phi.which_depends("u", [name], 2, True)[0] return phi
[docs] def dcbf( h: Callable[[SymType], SymType], x: SymType, u: SymType, dynamics: Callable[[SymType, SymType], SymType], alphas: _Iterable[Callable[[SymType], SymType]], ) -> cs.Function: r"""Discrete-time Control Barrier Function (DCBF) for the given constraint ``h`` and system with dynamics ``dynamics``. This method constructs a DCBF for the constraint :math:`h(x) \geq 0` using the given system's dynamics :math:`x_{+} = f(x, u)`. Here, :math:`x_{+}` is the next state after applying control input :math:`u`, and :math:`f` is the dynamics function ``dynamics``. The method can also compute a High-Order (HO) DCBF by passing more than one class :math:`\mathcal{K}` functions ``alphas``. As per [1]_, the HO-DCBF :math:`\phi_m` of degree :math:`m` is recursively found as .. math:: \phi_m(x_k) = \phi_{m-1}(x_{k+1}) - \phi_{m-1}(x_k) + \alpha_m(\phi_{m-1}(x_k)) and should be imposed as the constraint :math:`\phi_m(x_k) \geq 0`. Parameters ---------- h : callable The constraint function for which to build the DCBF. It must be of the signature :math:`x \rightarrow h(x)`. x : casadi SX or MX The state vector variable :math:`x`. u : casadi SX or MX The control input vector variable :math:`u`. dynamics : callable The dynamics function :math:`f` with signature :math:`x,u \rightarrow f(x, u)`. alphas : iterable of callables An iterable of class :math:`\mathcal{K}` functions :math:`\alpha_m` for the HO-DCBF. The length of the iterable determines the degree of the HO-DCBF. Returns ------- casadi SX or MX Returns the HO-DCBF function :math:`\phi_m` as a symbolic variable that is function of the provided ``x`` and ``u``. References ---------- .. [1] Yuhang Xiong, Di-Hua Zhai, Mahdi Tavakoli, Yuanqing Xia. Discrete-time control barrier function: High-order case and adaptive case. *IEEE Transactions on Cybernetics*, 53(5), 3231-3239, 2022. Examples -------- >>> import casadi as cs >>> A = cs.SX.sym("A", 2, 2) >>> B = cs.SX.sym("B", 2, 1) >>> x = cs.SX.sym("x", A.shape[0], 1) >>> u = cs.SX.sym("u", B.shape[1], 1) >>> dynamics = lambda x, u: A @ x + B @ u >>> M = cs.SX.sym("M") >>> c = cs.SX.sym("c") >>> gamma = cs.SX.sym("gamma") >>> alphas = [lambda z: gamma * z] >>> h = lambda x: M - c * x[0] # >= 0 >>> cbf = dcbf(h, x, u, dynamics, alphas) >>> print(cbf) """ x_next = dynamics(x, u) phi = h(x) for alpha in alphas: phi_next = cs.substitute(phi, x, x_next) phi = phi_next - phi + alpha(phi) return phi
[docs] def iccbf( h: Callable[[SymType], SymType], x: SymType, u: SymType, dynamics_f_and_g: Callable[[SymType], tuple[SymType, SymType]], alphas: _Iterable[Callable[[SymType], SymType]], norm: float, bound: float = 1.0, ) -> cs.Function: r"""Continuous-time Input Constrained Control Barrier Function (ICCBF) for the given constraint ``h`` and system with dynamics ``dynamics_f_and_g`` subject to norm-bounded control action. This method constructs an ICCBF for the constraint :math:`h(x) \geq 0` using the given system's control-affine dynamics :math:`\dot{x} = f(x) + g(x) u`. Here, :math:`\dot{x}` is the time derivative of the state after applying control input :math:`u`, and :math:`f` and :math:`g` are returned by the dynamics function ``dynamics_f_and_g``. The ICCBF takes into account the norm-bounded control input :math:`|| u ||_p \leq b`, where the norm power :math:`p` and bound :math:`b` are specified as ``norm`` and ``bound``. The method can also compute a High-Order (HO) ICCBF by passing more than one class :math:`\mathcal{K}` functions ``alphas``. As per [1]_, the HO-ICCBF :math:`\phi_m` of degree :math:`m` is recursively found as .. math:: \phi_m(x) = L_f \phi_{m-1}(x) + \inf_{u \in \mathcal{U}} \left\{ L_g \phi_{m-1}(x) u \right\} + \alpha_m(\phi_{m-1}(x)) and should be imposed as the constraint :math:`\phi_m(x) \geq 0`. Parameters ---------- h : callable The constraint function for which to build the ICCBF. It must be of the signature :math:`x \rightarrow h(x)`. x : casadi SX or MX The state vector variable :math:`x`. u : casadi SX or MX The control input vector variable :math:`u`. dynamics_f_and_g : callable A callable computing, for the given state, the dynamics components :math:`f(x)` and :math:`g(x)`, i.e., the signature is :math:`x \rightarrow f(x), g(x)`. alphas : iterable of callables An iterable of class :math:`\mathcal{K}` functions :math:`\alpha_m` for the HO-ICCBF. The length of the iterable determines the degree of the HO-ICCBF. norm : float The norm power :math:`p` for the control input norm constraint :math:`||u||_p \le b`, with :math:`1 \le p \le \infty`. bound : float, optional The bound :math:`b` for the control input norm constraint :math:`||u||_p \le b`, by default ``1.0``. Must be larger than zero. Returns ------- casadi SX or MX Returns the HO-ICCBF function :math:`\phi_m` as a symbolic variable that is function of the provided ``x`` and ``u``. References ---------- .. [1] Devansh R. Agrawal and Dimitra Panagou. Safe control synthesis via input constrained control barrier functions. In *60th IEEE Conference on Decision and Control (CDC)*, 6113-6118, 2021. Examples -------- >>> import casadi as cs >>> A = cs.SX.sym("A", 2, 2) >>> B = cs.SX.sym("B", 2, 1) >>> x = cs.SX.sym("x", A.shape[0], 1) >>> u = cs.SX.sym("u", B.shape[1], 1) >>> dynamics_f_and_g = lambda x: (A @ x, B) >>> M = cs.SX.sym("M") >>> c = cs.SX.sym("c") >>> gamma = cs.SX.sym("gamma") >>> alphas = [lambda z: gamma * z] >>> h = lambda x: M - c * x[0] # >= 0 >>> cbf = iccbf(h, x, u, dynamics_f_and_g, alphas, norm=2, bound=0.5) >>> print(cbf) """ # find the dual norm of the given norm y = cs.SX.sym("y", u.shape[0], 1) dual_norm_func = cs.Function("dual_norm", (y,), (_dual_norm(y, norm),)) # continue from here as usual f, g = dynamics_f_and_g(x) phi = h(x) for alpha in alphas: Lf_phi = _lie_derivative(phi, x, f) Lg_phi = _lie_derivative(phi, x, g) Lg_phi_u_sup = bound * dual_norm_func(Lg_phi) alpha_phi = alpha(phi) phi = Lf_phi - Lg_phi_u_sup + alpha_phi return Lf_phi + Lg_phi * u + alpha_phi # skip infimum in last iteration