Source code for qics.cones.symmetric.secondorder

# Copyright (c) 2024, Kerry He, James Saunderson, and Hamza Fawzi

# This Python package QICS is licensed under the MIT license; see LICENSE.md
# file in the root directory or at https://github.com/kerry-he/qics

import numpy as np
import scipy as sp

from qics.cones.base import SymCone



[docs]
class SecondOrder(SymCone):
    r"""A class representing a second order cone

    .. math::

        \mathcal{Q}_{n+1} = \{ (t, x) \in \mathbb{R} \times \mathbb{R}^{n}
        : t \geq \| x \|_2 \}.

    Parameters
    ----------
    n : :obj:`int`
        Dimension of the vector :math:`x`, i.e., how many terms are in the
        Euclidean norm.
    """

    def __init__(self, n):
        self.n = n

        self.nu = 1  # Barrier parameter

        self.dim = [1, n]
        self.type = ["r", "r"]

        # Update flags
        self.grad_updated = False
        self.congr_aux_updated = False
        self.nt_aux_updated = False

        return

    def get_init_point(self, out):
        point = [np.array([[1.0]]), np.zeros((self.n, 1))]

        self.set_point(point, point)

        out[0][:] = point[0]
        out[1][:] = point[1]

        return out

    def get_feas(self):
        self.feas_updated = True

        self.x, self.z = self.primal, self.dual

        # Check that x0 > ||x1||_2
        if _soc_res(self.x) <= 0:
            self.feas = False
            return self.feas

        # Check that z0 > ||z1||_2
        if self.z is not None:
            if _soc_res(self.z) <= 0:
                self.feas = False
                return self.feas

        self.feas = True
        return self.feas

    def get_dual_feas(self):
        self.z = self.dual
        return _soc_res(self.z) > 0

    def get_val(self):
        return -0.5 * np.log(_soc_res(self.x))

    def update_grad(self):
        assert self.feas_updated
        assert not self.grad_updated

        self.x_res = _soc_res(self.x)
        self.x_res_inv = 1 / self.x_res

        self.grad = [-self.x[0] * self.x_res_inv, self.x[1] * self.x_res_inv]

        self.grad_updated = True

    def hess_prod_ip(self, out, H):
        assert self.grad_updated

        (Ht, Hx) = H

        x_res_inv2 = self.x_res_inv * self.x_res_inv
        coeff = 2 * x_res_inv2 * (self.x[0] * Ht - self.x[1].T @ Hx)

        out[0][:] = coeff * self.x[0] - Ht * self.x_res_inv
        out[1][:] = -coeff * self.x[1] + Hx * self.x_res_inv

        return out

    def hess_congr(self, A):
        assert self.grad_updated
        if not self.congr_aux_updated:
            self.congr_aux(A)

        # First term, i.e., 2/(x'Jx)^2 * AJxx'JA
        lhs = self.x[0] * self.At.T - self.x[1].T @ self.Ax.T
        lhs *= np.sqrt(2.0) * self.x_res_inv
        out = lhs.T @ lhs

        # Second term, i.e., -1/(x'Jx) * AJA
        out -= (self.At @ self.At.T) * self.x_res_inv
        out += (self.Ax @ self.Ax.T) * self.x_res_inv

        return out

    def invhess_prod_ip(self, out, H):
        assert self.grad_updated

        (Ht, Hx) = H

        coeff = 2 * (self.x[0] * Ht + self.x[1].T @ Hx)

        out[0][:] = coeff * self.x[0] - Ht * self.x_res
        out[1][:] = coeff * self.x[1] + Hx * self.x_res

        return out

    def invhess_congr(self, A):
        assert self.grad_updated
        if not self.congr_aux_updated:
            self.congr_aux(A)

        # First term, i.e., 2 * Axx'A
        lhs = self.x[0] * self.At.T + self.x[1].T @ self.Ax.T
        lhs *= np.sqrt(2.0)
        out = lhs.T @ lhs

        # Second term, i.e., -(x'Jx) * AJA
        out -= (self.At @ self.At.T) * self.x_res
        out += (self.Ax @ self.Ax.T) * self.x_res

        return out

    def third_dir_deriv_axpy(self, out, H, a=True):
        assert self.grad_updated

        (Ht, Hx) = H

        x_res_inv2 = self.x_res_inv * self.x_res_inv
        x_res_inv3 = self.x_res_inv * x_res_inv2

        # Gradients of f(t, x) = t^2 - x'x
        DPhit = 2 * self.x[0]
        DPhix = -2 * self.x[1]

        DPhitH = Ht * DPhit
        DPhixH = Hx.T @ DPhix

        # Hessians of f(t, x) = t^2 - x'x
        D2PhittH = 2 * Ht
        D2PhixxH = -2 * Hx

        D2PhitHH = Ht * D2PhittH
        D2PhixHH = Hx.T @ D2PhixxH

        # Third order derivatives of barrier
        coeff1 = DPhitH + DPhixH
        coeff2 = x_res_inv2 * (D2PhitHH + D2PhixHH)
        coeff2 -= 2 * x_res_inv3 * coeff1 * coeff1
        coeff2 *= 0.5

        dder3_t = coeff2 * DPhit
        dder3_t += coeff1 * x_res_inv2 * D2PhittH

        dder3_x = coeff2 * DPhix
        dder3_x += coeff1 * x_res_inv2 * D2PhixxH

        out[0][:] += dder3_t * a
        out[1][:] += dder3_x * a

        return out

    # ==========================================================================
    # Functions specific to symmetric cones for NT scaling
    # ==========================================================================
    # We want the NT scaling point w and scaled variable lambda such that
    #     H(w) s = z  <==> lambda := W^-T s = W z
    # where H(w) = W^T W.
    # See: [Section 4.1]https://www.seas.ucla.edu/~vandenbe/publications/coneprog.pdf

    def nt_aux(self):
        assert not self.nt_aux_updated
        self.x_res = _soc_res(self.x)
        self.z_res = _soc_res(self.z)

        self.rt_x_res = np.sqrt(self.x_res)
        self.rt_z_res = np.sqrt(self.z_res)

        x_bar = [self.x[0] / self.rt_x_res, self.x[1] / self.rt_x_res]
        z_bar = [self.z[0] / self.rt_z_res, self.z[1] / self.rt_z_res]

        xz_bar = x_bar[0] * z_bar[0] + x_bar[1].T @ z_bar[1]
        gamma = np.sqrt((1.0 + xz_bar) / 2.0)

        # Scaling point
        # w_bar = (s_bar + J z_bar) / 2*gamma
        self.w_res = self.rt_x_res / self.rt_z_res
        self.rt_w_res = np.sqrt(self.w_res)
        self.w_bar = [
            (x_bar[0] + z_bar[0]) / (2 * gamma),
            (x_bar[1] - z_bar[1]) / (2 * gamma),
        ]
        # w_bar^1/2 = (w_bar + e) / (2 * (w0_bar + 1))^1/2
        self.rt_w_bar = [
            (1 + self.w_bar[0]) / np.sqrt(2 * (self.w_bar[0] + 1)),
            self.w_bar[1] / np.sqrt(2 * (self.w_bar[0] + 1)),
        ]
        # w = (x'Jx / z'Jz)^1/4 * w_bar
        self.w = [self.w_bar[0] * self.rt_w_res, self.w_bar[1] * self.rt_w_res]

        # Scaled variable
        # lambda0_bar = gamma
        # lambda1_bar = ((gamma + z0_bar)*s1_bar + (gamma + s0_bar)*z1_bar)
        #              -----------------------------------------------------
        #                          (s0_bar + z0_bar + 2*gamma)
        temp = (gamma + z_bar[0]) * x_bar[1] + (gamma + x_bar[0]) * z_bar[1]
        self.lmbda_bar = [gamma, temp / (x_bar[0] + z_bar[0] + 2 * gamma)]
        # lambda = ((x'Jx)(z'Jz))^1/4 * lambda_bar
        self.lmbda = [
            self.lmbda_bar[0] * np.sqrt(self.rt_x_res * self.rt_z_res),
            self.lmbda_bar[1] * np.sqrt(self.rt_x_res * self.rt_z_res),
        ]
        self.lmbda_res = _soc_res(self.lmbda)
        self.rt_lmbda_res = np.sqrt(self.lmbda_res)

        self.nt_aux_updated = True

    def nt_prod_ip(self, out, H):
        if not self.nt_aux_updated:
            self.nt_aux()

        (Ht, Hx) = H

        w_res_inv2 = 1 / (self.w_res * self.w_res)
        coeff = 2 * w_res_inv2 * (self.w[0] * Ht - self.w[1].T @ Hx)

        out[0][:] = coeff * self.w[0] - Ht / self.w_res
        out[1][:] = -coeff * self.w[1] + Hx / self.w_res

        return out

    def nt_congr(self, A):
        if not self.nt_aux_updated:
            self.nt_aux()
        if not self.congr_aux_updated:
            self.congr_aux(A)

        # First term, i.e., 2/(w'Jw)^2 * AJww'JA
        lhs = self.w[0] * self.At.T - self.w[1].T @ self.Ax.T
        lhs *= np.sqrt(2.0) / self.w_res
        out = lhs.T @ lhs

        # Second term, i.e., -(w'Jw) * AJA
        out -= (self.At @ self.At.T) / self.w_res
        out += (self.Ax @ self.Ax.T) / self.w_res

        return out

    def invnt_prod_ip(self, out, H):
        if not self.nt_aux_updated:
            self.nt_aux()

        (Ht, Hx) = H

        coeff = 2 * (self.w[0] * Ht + self.w[1].T @ Hx)

        out[0][:] = coeff * self.w[0] - Ht * self.w_res
        out[1][:] = coeff * self.w[1] + Hx * self.w_res

        return out

    def invnt_congr(self, A):
        if not self.nt_aux_updated:
            self.nt_aux()
        if not self.congr_aux_updated:
            self.congr_aux(A)

        # First term, i.e., 2 * Aww'A
        lhs = self.w[0] * self.At.T + self.w[1].T @ self.Ax.T
        lhs *= np.sqrt(2.0)
        out = lhs.T @ lhs

        # Second term, i.e., -(w'Jw) * AJA
        out -= (self.At @ self.At.T) * self.w_res
        out += (self.Ax @ self.Ax.T) * self.w_res

        return out

    def comb_dir(self, out, ds, dz, sigma_mu):
        # Compute the residual for rs where rs is given as the lhs of
        #     Lambda o (W dz + W^-T ds) = -Lambda o Lambda - (W^-T ds_a) o (W dz_a)
        #                                 + sigma * mu * 1
        # which is rearranged into the form H ds + dz = rs, i.e.,
        #     rs := W^-1 [ Lambda \ (-Lambda o Lambda - (W^-T ds_a) o (W dz_a) + sigma*mu 1) ]
        # See: [Section 5.4]https://www.seas.ucla.edu/~vandenbe/publications/coneprog.pdf
        if not self.nt_aux_updated:
            self.nt_aux()

        # Compute (W^-T ds_a) o (W dz_a)
        rtw_ds = 2 * (self.rt_w_bar[0] * ds[0] - self.rt_w_bar[1].T @ ds[1])
        W_ds = [
            (rtw_ds * self.rt_w_bar[0] - ds[0]) / self.rt_w_res,
            (-rtw_ds * self.rt_w_bar[1] + ds[1]) / self.rt_w_res,
        ]

        rt2_dz = 2 * (self.rt_w_bar[0] * dz[0] + self.rt_w_bar[1].T @ dz[1])
        W_dz = [
            (rt2_dz * self.rt_w_bar[0] - dz[0]) * self.rt_w_res,
            (rt2_dz * self.rt_w_bar[1] + dz[1]) * self.rt_w_res,
        ]

        Wds_Wdz = _soc_prod(W_ds, W_dz)

        # Compute -Lambda o Lambda - [ ... ] + sigma*mu I
        lmbda_lmbda = _soc_prod(self.lmbda, self.lmbda)
        rhs = [
            -lmbda_lmbda[0] - Wds_Wdz[0] + sigma_mu,
            -lmbda_lmbda[1] - Wds_Wdz[1],
        ]

        # Compute Lambda \ [ ... ]
        lmbda_rhs = self.lmbda[1].T @ rhs[1]
        temp = self.lmbda_res * rhs[1] + lmbda_rhs * self.lmbda[1]
        work = [
            (self.lmbda[0] * rhs[0] - lmbda_rhs) / self.lmbda_res,
            (-rhs[0] * self.lmbda[1] + temp / self.lmbda[0]) / self.lmbda_res,
        ]

        # Compute W^-1 [ ... ]
        temp = 2 * (self.rt_w_bar[0] * work[0] - self.rt_w_bar[1].T @ work[1])
        out[0][:] = (temp * self.rt_w_bar[0] - work[0]) / self.rt_w_res
        out[1][:] = (-temp * self.rt_w_bar[1] + work[1]) / self.rt_w_res

    def step_to_boundary(self, ds, dz):
        # Compute the maximum step alpha in [0, 1] we can take such that
        #     s + alpha ds >= 0
        #     z + alpha dz >= 0
        # See: [Section 8.3]https://www.seas.ucla.edu/~vandenbe/publications/coneprog.pdf
        if not self.nt_aux_updated:
            self.nt_aux()

        # Compute (W^-T ds_a) and (W dz_a)
        rtw_ds = 2 * (self.rt_w_bar[0] * ds[0] - self.rt_w_bar[1].T @ ds[1])
        W_ds = [
            (rtw_ds * self.rt_w_bar[0] - ds[0]) / self.rt_w_res,
            (-rtw_ds * self.rt_w_bar[1] + ds[1]) / self.rt_w_res,
        ]

        rtw_dz = 2 * (self.rt_w_bar[0] * dz[0] + self.rt_w_bar[1].T @ dz[1])
        W_dz = [
            (rtw_dz * self.rt_w_bar[0] - dz[0]) * self.rt_w_res,
            (rtw_dz * self.rt_w_bar[1] + dz[1]) * self.rt_w_res,
        ]

        # Compute rho = H(lambda)^1/2 W^-T ds_a
        temp = self.lmbda_bar[0] * W_ds[0] - self.lmbda_bar[1].T @ W_ds[1]
        temp2 = (temp + W_ds[0]) / (1 + self.lmbda_bar[0]) * self.lmbda_bar[1]
        rho = [temp / self.rt_lmbda_res, (W_ds[1] - temp2) / self.rt_lmbda_res]

        # Compute sig = H(lambda)^1/2 W dz_a
        temp = self.lmbda_bar[0] * W_dz[0] - self.lmbda_bar[1].T @ W_dz[1]
        temp2 = (temp + W_dz[0]) / (1 + self.lmbda_bar[0]) * self.lmbda_bar[1]
        sig = [temp / self.rt_lmbda_res, (W_dz[1] - temp2) / self.rt_lmbda_res]

        # Maximum step is given by
        #     alpha := 1 / max(0, ||rho1||_2 - rho0, ||sig1||_2 - sig0)
        # Clamp this step between 0 and 1
        rho_step = (rho[0] - np.sqrt(rho[1].T @ rho[1]))[0, 0]
        sig_step = (sig[0] - np.sqrt(sig[1].T @ sig[1]))[0, 0]

        if rho_step >= 0 and sig_step >= 0:
            return 1.0
        else:
            return 1.0 / max(-rho_step, -sig_step)

    # ==========================================================================
    # Auxilliary functions
    # ==========================================================================
    def congr_aux(self, A):
        assert not self.congr_aux_updated

        if sp.sparse.issparse(A):
            A = A.tocsr()

        self.At = A[:, [0]]
        self.Ax = A[:, 1:]

        self.congr_aux_updated = True



def _soc_res(x):
    return (x[0] * x[0] - x[1].T @ x[1])[0, 0]


def _soc_prod(x, y):
    return [x[0] * y[0] + x[1].T @ y[1], x[0] * y[1] + y[0] * x[1]]