Source code for qics.cones.entropy.quantentr

# Copyright (c) 2024, Kerry He, James Saunderson, and Hamza Fawzi

# This Python package QICS is licensed under the MIT license; see LICENSE.md
# file in the root directory or at https://github.com/kerry-he/qics

import numpy as np
import scipy as sp

from qics._utils.gradient import D1_log, D2_log, scnd_frechet
from qics._utils.linalg import congr_multi, dense_dot_x, inp
from qics.cones.base import Cone, get_central_ray_entr
from qics.vectorize import vec_to_mat



[docs]
class QuantEntr(Cone):
    r"""A class representing a (homogenized) quantum entropy cone

    .. math::

        \mathcal{QE}_{n} = \text{cl}\{ (t, u, X) \in \mathbb{R} \times
        \mathbb{R}_{++} \times \mathbb{H}^n_{++} : t \geq -u S(u^{-1}X) \},

    where

    .. math::

        S(X) = -\text{tr}[X \log(X)],

    is the quantum (von Neumann) entropy function.

    Parameters
    ----------
    n : :obj:`int`
        Dimension of the matrix :math:`X`.
    iscomplex : :obj:`bool`
        Whether the matrix :math:`X` is defined over :math:`\mathbb{H}^n`
        (``True``), or restricted to :math:`\mathbb{S}^n` (``False``). The
        default is ``False``.

    See also
    --------
    ClassEntr : (Homogenized) classical entropy cone
    QuantRelEntr : Quantum relative entropy cone

    Notes
    -----
    The epigraph of the quantum entropy can be obtained by enforcing the
    linear constraint :math:`u=1`.
    """

    def __init__(self, n, iscomplex=False):
        # Dimension properties
        self.n = n
        self.iscomplex = iscomplex

        self.nu = 2 + self.n  # Barrier parameter

        if iscomplex:
            self.dim = [1, 1, 2 * n * n]
            self.type = ["r", "r", "h"]
            self.dtype = np.complex128
        else:
            self.dim = [1, 1, n * n]
            self.type = ["r", "r", "s"]
            self.dtype = np.float64

        # Update flags
        self.feas_updated = False
        self.grad_updated = False
        self.hess_aux_updated = False
        self.invhess_aux_updated = False
        self.congr_aux_updated = False
        self.dder3_aux_updated = False

        return

    def get_iscomplex(self):
        return self.iscomplex

    def get_init_point(self, out):
        (t0, u0, x0) = get_central_ray_entr(self.n)

        point = [
            np.array([[t0]]),
            np.array([[u0]]),
            np.eye(self.n, dtype=self.dtype) * x0,
        ]

        self.set_point(point, point)

        out[0][:] = point[0]
        out[1][:] = point[1]
        out[2][:] = point[2]

        return out

    def get_feas(self):
        if self.feas_updated:
            return self.feas

        self.feas_updated = True

        (self.t, self.u, self.X) = self.primal

        # Check that u and X are positive (definite)
        self.Dx, self.Ux = np.linalg.eigh(self.X)

        if (self.u <= 0) or any(self.Dx <= 0):
            self.feas = False
            return self.feas

        # Check that t > -u S(X/u) = tr[X log(X)] - tr[X] log(u)
        self.trX = np.trace(self.X).real
        self.log_Dx = np.log(self.Dx)
        self.log_u = np.log(self.u[0, 0])

        entr_X = inp(self.Dx, self.log_Dx)
        entr_Xu = self.trX * self.log_u
        self.z = self.t[0, 0] - (entr_X - entr_Xu)

        self.feas = self.z > 0
        return self.feas

    def get_val(self):
        return -np.log(self.z) - np.sum(self.log_Dx) - self.log_u

    def update_grad(self):
        assert self.feas_updated
        assert not self.grad_updated

        # Compute gradients of quantum entropy
        # D_u S(u, X) = -tr[X] / u
        self.ui = np.reciprocal(self.u)
        self.DPhiu = -self.trX * self.ui
        # D_X S(u, X) = log(X) + (1 - log(u)) I
        log_X = (self.Ux * self.log_Dx) @ self.Ux.conj().T
        log_X = (log_X + log_X.conj().T) * 0.5
        self.DPhiX = log_X + np.eye(self.n) * (1.0 - self.log_u)

        # Compute X^-1
        inv_Dx = np.reciprocal(self.Dx)
        inv_X_rt2 = self.Ux * np.sqrt(inv_Dx)
        self.inv_X = inv_X_rt2 @ inv_X_rt2.conj().T

        # Compute gradient of barrier function
        self.zi = np.reciprocal(self.z)

        self.grad = [
            -self.zi,
            self.zi * self.DPhiu - self.ui,
            self.zi * self.DPhiX - self.inv_X,
        ]

        self.grad_updated = True

    def hess_prod_ip(self, out, H):
        assert self.grad_updated
        if not self.hess_aux_updated:
            self.update_hessprod_aux()

        (Ht, Hu, Hx) = H

        UxHxUx = self.Ux.conj().T @ Hx @ self.Ux

        # Hessian product of quantum entropy
        # D2_uu S(u, X) [Hu] = tr[X] Hu / u^2
        D2PhiuuH = self.trX * Hu * self.ui2
        # D2_uX S(u, X) [Hx] = -tr[Hx] / u
        D2PhiuXH = -np.trace(Hx).real * self.ui
        # D2_Xu S(u, X) [Hu] = -I Hu / u
        D2PhiXuH = -np.eye(self.n) * Hu * self.ui
        # D2_XX S(u, X) [Hx] =  Ux [log^[1](Dx) .* (Ux' Hx Ux)] Ux'
        D2PhiXXH = self.Ux @ (self.D1x_log * UxHxUx) @ self.Ux.conj().T

        # ======================================================================
        # Hessian products with respect to t
        # ======================================================================
        # D2_t F(t, u, X)[Ht, Hu, Hx]
        #         = (Ht - D_u S(u, X)[Hu] - D_X S(u, X)[Hx]) / z^2
        out[0][:] = (Ht - self.DPhiu * Hu - inp(self.DPhiX, Hx)) * self.zi2

        # ======================================================================
        # Hessian products with respect to u
        # ======================================================================
        # Hessian product of barrier function
        # D2_u F(t, u, X)[Ht, Hu, Hx]
        #         = -D2_t F(t, u, X)[Ht, Hu, Hx] * D_u S(u, X)
        #           + (D2_uu S(u, X)[Hu] + D2_uX S(u, X)[Hx]) / z
        #           + Hu / u^2
        out_u = -out[0] * self.DPhiu
        out_u += self.zi * (D2PhiuuH + D2PhiuXH)
        out_u += Hu * self.ui2
        out[1][:] = out_u

        # ======================================================================
        # Hessian products with respect to X
        # ======================================================================
        # D2_X F(t, u, X)[Ht, Hu, Hx]
        #         = -D2_t F(t, u, X)[Ht, Hu, Hx] * D_X S(u, X)
        #           + (D2_Xu S(u, X)[Hu] + D2_XX S(u, X)[Hx]) / z
        #           + X^-1 Hx X^-1
        out_X = -out[0] * self.DPhiX
        out_X += self.zi * (D2PhiXuH + D2PhiXXH)
        out_X += self.inv_X @ Hx @ self.inv_X
        out_X = (out_X + out_X.conj().T) * 0.5
        out[2][:] = out_X

        return out

    def hess_congr(self, A):
        assert self.grad_updated
        if not self.hess_aux_updated:
            self.update_hessprod_aux()
        if not self.congr_aux_updated:
            self.congr_aux(A)

        p = A.shape[0]
        lhs = np.zeros((p, sum(self.dim)))

        work1, work2, work3 = self.work1, self.work2, self.work3

        # ======================================================================
        # Hessian products with respect to t
        # ======================================================================
        # D2_t F(t, u, X)[Ht, Hu, Hx]
        #         = (Ht - D_u S(u, X)[Hu] - D_X S(u, X)[Hx]) / z^2
        DPhiX_vec = self.DPhiX.view(np.float64).reshape((-1, 1))
        out_t = self.At - (self.Ax_vec @ DPhiX_vec).ravel()
        out_t -= self.Au * self.DPhiu[0, 0]
        out_t *= self.zi2

        lhs[:, 0] = out_t

        # ======================================================================
        # Hessian products with respect to u
        # ======================================================================
        # Hessian products for quantum entropy
        # D2_uu S(u, X) [Hu] = tr[X] Hu / u^2
        D2PhiuuH = (self.trX * self.ui2) * self.Au
        # D2_uX S(u, X) [Hx] = -tr[Hx] / u
        D2PhiuXH = np.trace(self.Ax, axis1=1, axis2=2).real * self.ui

        # Hessian product of barrier function
        # D2_u F(t, u, X)[Ht, Hu, Hx]
        #         = -D2_t F(t, u, X)[Ht, Hu, Hx] * D_u S(u, X)
        #           + (D2_uu S(u, X)[Hu] + D2_uX S(u, X)[Hx]) / z
        #           + Hu / u^2
        out_u = -out_t * self.DPhiu
        out_u += self.zi * (D2PhiuuH - D2PhiuXH)
        out_u += self.Au * self.ui2

        lhs[:, 1] = out_u

        # ====================================================================
        # Hessian products with respect to X
        # ====================================================================
        # Hessian products for quantum entropy
        # D2_XX S(u, X) [Hx] =  Ux [log^[1](Dx) .* (Ux' Hx Ux)] Ux'
        congr_multi(self.work1, self.Ux.conj().T, self.Ax, work2)
        work1 *= self.D1x_comb
        congr_multi(work3, self.Ux, work1, work2)
        # D2_Xu S(u, X) [Hu] = -I Hu / u
        work = (self.zi * self.ui[0, 0]) * self.Au.reshape(-1, 1)
        work3[:, range(self.n), range(self.n)] -= work

        # Hessian product of barrier function
        # D2_X F(t, u, X)[Ht, Hu, Hx]
        #         = -D2_t F(t, u, X)[Ht, Hu, Hx] * D_X S(u, X)
        #           + (D2_Xu S(u, X)[Hu] + D2_XX S(u, X)[Hx]) / z
        #           + X^-1 Hx X^-1
        np.outer(out_t, self.DPhiX, out=work1.reshape((p, -1)))
        work3 -= work1

        lhs[:, 2:] = work3.reshape((p, -1)).view(np.float64)

        # Multiply A (H A')
        return dense_dot_x(lhs, A.T)

    def invhess_prod_ip(self, out, H):
        assert self.grad_updated
        if not self.hess_aux_updated:
            self.update_hessprod_aux()
        if not self.invhess_aux_updated:
            self.update_invhessprod_aux()

        (Ht, Hu, Hx) = H

        Wu = Hu + Ht * self.DPhiu
        Wx = Hx + Ht * self.DPhiX

        work = self.Ux.conj().T @ Wx @ self.Ux
        N_inv_Wx = self.Ux @ (self.D1x_comb_inv * work) @ self.Ux.conj().T
        N_inv_Wx = (N_inv_Wx + N_inv_Wx.conj().T) * 0.5

        # ======================================================================
        # Inverse Hessian products with respect to u
        # ======================================================================
        # u = (Wu + tr[N \ Wx] / z) / ((1 + tr[X]/z) / u^2 - tr[N \ I] / z^2)
        out_u = Wu * self.uz2 + np.trace(N_inv_Wx).real * self.uz
        out_u /= (self.z2 + self.trX * self.z) - self.tr_N_inv_I
        out[1][:] = out_u

        # ======================================================================
        # Inverse Hessian products with respect to X
        # ======================================================================
        # X = (N \ Wx) + u / z (N \ I)
        out_X = N_inv_Wx + (out_u / self.uz) * self.N_inv_I
        out[2][:] = out_X

        # ======================================================================
        # Inverse Hessian products with respect to t
        # ======================================================================
        # t = z^2 Ht + <DS(u, X), (u, X)>
        out_t = self.z2 * Ht + self.DPhiu * out_u + inp(self.DPhiX, out_X)
        out[0][:] = out_t

        return out

    def invhess_congr(self, A):
        assert self.grad_updated
        if not self.hess_aux_updated:
            self.update_hessprod_aux()
        if not self.invhess_aux_updated:
            self.update_invhessprod_aux()
        if not self.congr_aux_updated:
            self.congr_aux(A)

        # The inverse Hessian product applied on (Ht, Hu, Hx) for the QE barrier
        # is
        #     (u, X) =  M \ (Wu, Wx)
        #         t  =  z^2 Ht + <DPhi(u, X), (u, X)>
        # where (Wu, Wx) = [(Hu, Hx) + Ht DPhi(u, X)]
        #     M = [ (1 + tr[X]/z) / u^2   -vec(I)' / zu ]
        #         [    -vec(I) / zu              N      ]
        # and
        #     N = (Ux kron Ux) (1/z log + inv)^[1](Dx) (Ux' kron Ux')
        #
        # To solve linear systems with M, we simplify it by doing block
        # elimination, in which case we get
        #     u = (Wu + tr[N \ Wx] / z) / ((1 + tr[X]/z) / u^2 - tr[N \ I] / z^2)
        #     X = (N \ Wx) + u / z (N \ I)

        p = A.shape[0]
        lhs = np.zeros((p, sum(self.dim)))

        work1, work2, work3 = self.work1, self.work2, self.work3

        # Compute Wu
        Wu = self.Au + self.At * self.DPhiu[0, 0]
        # Compute Wx
        np.outer(self.At, self.DPhiX, out=work2.reshape((p, -1)))
        np.add(self.Ax, work2, out=work1)

        # Compute N \ Wx
        congr_multi(work2, self.Ux.conj().T, work1, work3)
        work2 *= self.D1x_comb_inv
        congr_multi(work1, self.Ux, work2, work3)

        # ======================================================================
        # Inverse Hessian products with respect to u
        # ======================================================================
        # u = (Wu + tr[N \ Wx] / z) / ((1 + tr[X]/z) / u^2 - tr[N \ I] / z^2)
        tr_N_inv_Wx = np.trace(work1, axis1=1, axis2=2).real
        out_u = Wu * self.uz2[0, 0] + tr_N_inv_Wx * self.uz[0, 0]
        out_u /= (self.z2 + self.trX * self.z) - self.tr_N_inv_I
        lhs[:, 1] = out_u

        # ======================================================================
        # Inverse Hessian products with respect to X
        # ======================================================================
        # X = (N \ Wx) + u / z (N \ I)
        np.outer(out_u / self.uz, self.N_inv_I, out=work2.reshape((p, -1)))
        work1 += work2
        out_X = work1.reshape((p, -1)).view(np.float64)
        lhs[:, 2:] = out_X

        # ======================================================================
        # Inverse Hessian products with respect to t
        # ======================================================================
        DPhiX_vec = self.DPhiX.view(np.float64).reshape((-1, 1))

        # t = z^2 Ht + <DS(u, X), (u, X)>
        out_t = self.z2 * self.At
        out_t += out_u * self.DPhiu[0, 0]
        out_t += (out_X @ DPhiX_vec).ravel()
        lhs[:, 0] = out_t

        # Multiply A (H A')
        return dense_dot_x(lhs, A.T)

    def third_dir_deriv_axpy(self, out, H, a=True):
        assert self.grad_updated
        if not self.hess_aux_updated:
            self.update_hessprod_aux()
        if not self.dder3_aux_updated:
            self.update_dder3_aux()

        (Ht, Hu, Hx) = H

        Hu2 = Hu * Hu
        trHx = np.trace(Hx).real

        chi = (Ht - self.DPhiu * Hu)[0, 0] - inp(self.DPhiX, Hx)
        chi2 = chi * chi

        UxHxUx = self.Ux.conj().T @ Hx @ self.Ux

        # Quantum entropy Hessians
        D2PhiuuH = self.trX * Hu * self.ui2
        D2PhiuXH = -np.trace(Hx).real * self.ui
        D2PhiXuH = -np.eye(self.n) * Hu * self.ui
        D2PhiXXH = self.Ux @ (self.D1x_log * UxHxUx) @ self.Ux.conj().T

        D2PhiuHH = inp(Hu, D2PhiuXH + D2PhiuuH)
        D2PhiXHH = inp(Hx, D2PhiXXH + D2PhiXuH)

        # Quantum entropy third order derivatives
        D3Phiuuu = -2 * Hu2 * self.trX * self.ui3
        D3PhiuXu = Hu * trHx * self.ui2
        D3PhiuuX = D3PhiuXu

        D3PhiXXX = scnd_frechet(self.D2x_log, UxHxUx, UxHxUx, self.Ux)
        D3PhiXuu = (Hu2 * self.ui2) * np.eye(self.n)

        # Third derivatives of barrier
        dder3_t = -2 * self.zi3 * chi2 - self.zi2 * (D2PhiXHH + D2PhiuHH)

        dder3_u = -dder3_t * self.DPhiu
        dder3_u -= 2 * self.zi2 * chi * (D2PhiuXH + D2PhiuuH)
        dder3_u += self.zi * (D3PhiuuX + D3PhiuXu + D3Phiuuu)
        dder3_u -= 2 * Hu2 * self.ui3

        dder3_X = -dder3_t * self.DPhiX
        dder3_X -= 2 * self.zi2 * chi * (D2PhiXXH + D2PhiXuH)
        dder3_X += self.zi * (D3PhiXXX + D3PhiXuu)
        dder3_X -= 2 * self.inv_X @ Hx @ self.inv_X @ Hx @ self.inv_X
        dder3_X = (dder3_X + dder3_X.conj().T) * 0.5

        out[0][:] += dder3_t * a
        out[1][:] += dder3_u * a
        out[2][:] += dder3_X * a

        return out

    # ==========================================================================
    # Auxilliary functions
    # ==========================================================================
    def congr_aux(self, A):
        assert not self.congr_aux_updated

        iscomplex = self.iscomplex

        # Get slices and views of A matrix to be used in congruence computations
        if sp.sparse.issparse(A):
            A = A.tocsr()
        self.Ax_vec = A[:, 2:]

        if sp.sparse.issparse(A):
            A = A.toarray()
        Ax_dense = np.ascontiguousarray(A[:, 2:])
        self.At = A[:, 0]
        self.Au = A[:, 1]
        self.Ax = np.array([vec_to_mat(Ax_k, iscomplex) for Ax_k in Ax_dense])

        # Preallocate matrices we will need when performing these congruences
        self.work1 = np.empty_like(self.Ax)
        self.work2 = np.empty_like(self.Ax)
        self.work3 = np.empty_like(self.Ax)

        self.congr_aux_updated = True

    def update_hessprod_aux(self):
        assert not self.hess_aux_updated
        assert self.grad_updated

        self.zi2 = self.zi * self.zi
        self.ui2 = self.ui * self.ui

        D1x_inv = np.reciprocal(np.outer(self.Dx, self.Dx))
        self.D1x_log = D1_log(self.Dx, self.log_Dx)
        self.D1x_comb = self.zi * self.D1x_log + D1x_inv

        self.hess_aux_updated = True

    def update_invhessprod_aux(self):
        assert not self.invhess_aux_updated
        assert self.grad_updated
        assert self.hess_aux_updated

        self.z2 = self.z * self.z
        self.uz = self.u * self.z
        self.uz2 = self.uz * self.uz
        self.D1x_comb_inv = np.reciprocal(self.D1x_comb)

        N_inv_I_rt2 = self.Ux * np.sqrt(np.diag(self.D1x_comb_inv))
        self.N_inv_I = N_inv_I_rt2 @ N_inv_I_rt2.conj().T
        self.tr_N_inv_I = np.trace(self.N_inv_I).real

        self.invhess_aux_updated = True

    def update_dder3_aux(self):
        assert not self.dder3_aux_updated
        assert self.hess_aux_updated

        self.zi3 = self.zi * self.zi2
        self.ui3 = self.ui * self.ui2
        self.D2x_log = D2_log(self.Dx, self.D1x_log)

        self.dder3_aux_updated = True