Source code for qics.cones.entropy.quantkeydist

# Copyright (c) 2024, Kerry He, James Saunderson, and Hamza Fawzi

# This Python package QICS is licensed under the MIT license; see LICENSE.md
# file in the root directory or at https://github.com/kerry-he/qics

import numpy as np
import scipy as sp

from qics._utils.gradient import D1_log, D2_log, scnd_frechet
from qics._utils.linalg import (
    cho_fact,
    cho_solve,
    congr_multi,
    dense_dot_x,
    inp,
    x_dot_dense,
)
from qics.cones.base import Cone
from qics.vectorize import get_full_to_compact_op, vec_to_mat


[docs] class QuantKeyDist(Cone): r"""A class representing a quantum key distribution cone .. math:: \mathcal{QKD}_{\mathcal{G},\mathcal{Z}} = \text{cl}\{ (t, X) \in \mathbb{R} \times \mathbb{H}^n_{++} : t \geq -S(\mathcal{G}(X)) + S(\mathcal{Z}(\mathcal{G}(X))) \}, where .. math:: S(X) = -\text{tr}[X \log(X)], is the quantum (von Neumann) entropy function, :math:`\mathcal{G}:\mathbb{H}^n\rightarrow\mathbb{H}^{mr}` is a positive linear map, and :math:`\mathcal{Z}:\mathbb{H}^{mr}\rightarrow\mathbb{H}^{mr}` is a pinching map that maps off-diagonal blocks to zero. Parameters ---------- G_info : :obj:`int` or :obj:`list` of :class:`~numpy.ndarray` Defines the linear map :math:`\mathcal{G}`. There are two ways to specify this linear map. - If ``G_info`` is an :obj:`int`, then :math:`\mathcal{G}` is the identity map, i.e., :math:`\mathcal{G}(X)=X`, and ``G_info`` specifies the dimension of :math:`X`. - If ``G_info`` is a :obj:`list` of :class:`~numpy.ndarray`, then ``G_info`` specifies the Kraus operators :math:`K_i \in \mathbb{C}^{mr \times n }` corresponding to :math:`\mathcal{G}` such that .. math:: \mathcal{G}(X) = \sum_{i} K_i X K_i^\dagger. Z_info : :obj:`int` or :obj:`tuple` or :obj:`list` of :class:`~numpy.ndarray` Defines the pinching map :math:`\mathcal{Z}`, which is of the form .. math:: \mathcal{Z}(Y) = \sum_{i} Z_i Y Z_i^\dagger. There are three ways to specify this linear map. - If ``Z_info`` is an :obj:`int`, then :math:`Z_i=|i \rangle\langle i| \otimes\mathbb{I}` for :math:`i=1,\ldots,r`, where ``r=Z_info``. - If ``Z_info`` is a :obj:`tuple` of the form ``(dims, sys)``, where ``dims=(n0, n1)`` is a :obj:`tuple` of :obj:`int` and ``sys`` is an :obj:`int`, then - :math:`Z_i=|i \rangle\langle i| \otimes\mathbb{I}_{n_1}` for :math:`i=1,\ldots,n_0` if ``sys=0``, and - :math:`Z_i=\mathbb{I}_{n_0}\otimes |i \rangle\langle i|` for :math:`i=1,\ldots,n_1` if ``sys=1``. We generalize this definition to the case where ``dims`` and ``sys`` are lists of any length. - If ``Z_info`` is a :obj:`list` of :class:`~numpy.ndarray`, then ``Z_info`` directly specifies the Kraus operators :math:`Z_i \in \mathbb{C}^{mr \times mr}`. Note that these Kraus operators must have a similar structure to those defined using the other options, i.e., must be diagonal matrices consisting of either ones or zeros, and :math:`Z_iZ_j=0` for all :math:`i\neq j`. iscomplex : :obj:`bool` Whether the matrix :math:`X` is defined over :math:`\mathbb{H}^n` (``True``), or restricted to :math:`\mathbb{S}^n` (``False``). The default is ``False``. See also -------- QuantRelEntr : Quantum relative entropy cone Notes ----- The quantum key distribution cone can also be modelled by the quantum relative entropy by noting the identity .. math:: S(\mathcal{G}(X) \| \mathcal{Z}(\mathcal{G}(X))) = -S(\mathcal{G}(X)) + S(\mathcal{Z}(\mathcal{G}(X))). However, the cone oracles for the quantum key distribution cone are more efficient than those for the quantum relative entropy cone (especially when :math:`\mathcal{G}` is the idenity map), so it is recommended to use the quantum key distribution cone where possible. """ def __init__(self, G_info, Z_info, iscomplex=False): self._process_G_info(G_info) self._process_Z_info(Z_info) self.iscomplex = iscomplex self.nu = 1 + self.n # Barrier parameter if iscomplex: self.vn = self.n * self.n self.vm = self.m * self.m self.dim = [1, 2 * self.n * self.n] self.type = ["r", "h"] self.dtype = np.complex128 else: self.vn = self.n * (self.n + 1) // 2 self.vm = self.m * (self.m + 1) // 2 self.dim = [1, self.n * self.n] self.type = ["r", "s"] self.dtype = np.float64 # Facial reduction on G(X) and Z(G(X)) ZK_list_raw = [[K[Z, :] for K in self.K_list_raw] for Z in self.Z_idxs] self.K_list_blk = [facial_reduction(self.K_list_raw)] self.ZK_list_blk = [facial_reduction(ZK) for ZK in ZK_list_raw] self.Nk = [K_list[0].shape[0] for K_list in self.K_list_blk] self.Nzk = [ZK_list[0].shape[0] for ZK_list in self.ZK_list_blk] # Update flags self.feas_updated = False self.grad_updated = False self.hess_aux_updated = False self.invhess_aux_updated = False self.dder3_aux_updated = False self.invhess_aux_aux_updated = False self.congr_aux_updated = False self.hess_congr_aux_updated = False self.invhess_congr_aux_updated = False if self.G_is_Id: self.precompute_computational_basis(self.m) self.F2C_op = get_full_to_compact_op(self.m, iscomplex) else: self.precompute_computational_basis(self.n) self.F2C_op = get_full_to_compact_op(self.n, iscomplex) return def _process_G_info(self, G_info): if isinstance(G_info, int): # Define G(X) as the identity map self.n = n = G_info # Input dimension of X self.N = N = G_info # Output dimension of G(X) self.K_list_raw = [np.eye(self.N)] self.G_is_Id = True else: # Define G(X) using given Kraus operators self.n = n = G_info[0].shape[1] # Input dimension of X self.N = N = G_info[0].shape[0] # Output dimension of G(X) assert all([Ki.shape == (N, n) for Ki in G_info]), "Kraus " \ "operators specified by G_info must have the same dimensions." # fmt: skip self.K_list_raw = G_info self.G_is_Id = len(G_info) == 1 and n == N \ and np.linalg.norm(G_info[0] - np.eye(n)) < 1e-10 # fmt: skip def _process_Z_info(self, Z_info): N = self.N if isinstance(Z_info, int): # Define block structure for Z(X) with r x r blocks of size m x m self.r = r = Z_info self.m = m = N // r assert m * r == N, "Number of blocks r specified by Z_info " \ "must be an integer factor of the dimension of G(X)." # fmt: skip self.Z_list_raw = [np.zeros((N, N)) for _ in range(r)] for k in range(r): range_k = np.arange(k * m, (k + 1) * m) self.Z_list_raw[k][range_k, range_k] = 1.0 self.Z_idxs = [np.arange(i * m, (i + 1) * m) for i in range(r)] elif isinstance(Z_info, tuple): # Define custom block structure for a given subsystem structure (dims, sys) = Z_info if isinstance(sys, int): sys = [sys] if isinstance(sys, tuple) or isinstance(sys, set): sys = list(sys) assert np.prod(dims) == N, "Total dimension of subsystems must equal " \ "the dimension of G(X)." # fmt: skip assert all([sys_k < len(dims) for sys_k in sys]), "Invalid subsystems " \ "specified, exceeds total number of dimensions provided." # fmt: skip self.r = np.prod([dims[k] for k in sys]) self.m = N // self.r idxs = np.meshgrid(*[range(dims[k]) for k in sys]) idxs = list(np.array(idxs).reshape(len(sys), -1).T) self.Z_list_raw = [] for i in range(len(idxs)): Z_i = np.array([1]) counter = 0 for k, dim_k in enumerate(dims): if k in sys: Z_ik = np.zeros(dim_k) Z_ik[idxs[i][counter]] = 1 Z_i = np.kron(Z_i, Z_ik) counter += 1 else: Z_i = np.kron(Z_i, np.ones(dim_k)) self.Z_list_raw += [np.diag(Z_i)] self.Z_idxs = [np.where(Z)[0] for Z in self.Z_list_raw] else: # Define Z(X) using given Kraus operators self.r = len(Z_info) self.m = self.N // self.r assert all([Zi.shape == (N, N) for Zi in Z_info]), "Kraus operators " \ "specified by Z_info must have the same dimensions." # fmt: skip assert not any([np.any(Zi - np.diag(np.diag(Zi))) for Zi in Z_info]), \ "Kraus operators specified by Z_info must be diagonal." # fmt: skip assert all([all([x in {0, 1} for x in np.diag(Zi)]) for Zi in Z_info]), \ "Diagonal elements of Kraus operators specified by Z_info must only " \ "be either 0s or 1s." # fmt: skip assert np.all(sum(Z_info) == np.eye(N)), "Kraus operators specified by " \ "Z_info must satisfy ZiZj = 0 for all i =/= j." # fmt: skip self.Z_list_raw = Z_info self.Z_idxs = [np.where(Z)[0] for Z in Z_info] def get_iscomplex(self): return self.iscomplex def get_init_point(self, out): from qics.quantum import entropy eye = np.eye(self.n) GI_blk = [apply_kraus(eye, K_list) for K_list in self.K_list_blk] ZGI_blk = [apply_kraus(eye, ZK_list) for ZK_list in self.ZK_list_blk] entr_GI = sum([entropy(KK) for KK in GI_blk]) entr_ZGI = sum([entropy(ZKKZ) for ZKKZ in ZGI_blk]) f0 = -entr_GI + entr_ZGI t0 = f0 / 2 + np.sqrt(1 + f0 * f0 / 4) point = [np.array([[t0]]), np.eye(self.n, dtype=self.dtype)] self.set_point(point, point) out[0][:] = point[0] out[1][:] = point[1] return out def get_feas(self): if self.feas_updated: return self.feas self.feas_updated = True (self.t, self.X) = (t, X) = self.primal # Check that X is positive definite self.Dx, self.Ux = np.linalg.eigh(X) if any(self.Dx <= 0): self.feas = False return self.feas # Check that G(X) is positive definite # Note that G(X) should be positive definite if X is, but check # just to be sure that we can safely take logarithms of G(X). GX_blk = [apply_kraus(X, K_list) for K_list in self.K_list_blk] DUgx_blk = [np.linalg.eigh(GX) for GX in GX_blk] self.Dgx_blk = Dgx_blk = [DUgx[0] for DUgx in DUgx_blk] self.Ugx_blk = [DUgx[1] for DUgx in DUgx_blk] if any([any(Dgx <= 0) for Dgx in self.Dgx_blk]): self.feas = False return self.feas # Check that Z(G(X)) is positive definite # Note that Z(G(X)) should be positive definite if X is, but check # just to be sure that we can safely take logarithms of Z(G(X)). ZGX_blk = [apply_kraus(X, ZG_list) for ZG_list in self.ZK_list_blk] DUzgx_blk = [np.linalg.eigh(ZGX) for ZGX in ZGX_blk] self.Dzgx_blk = Dzgx_blk = [DUzkx[0] for DUzkx in DUzgx_blk] self.Uzgx_blk = [DUzkx[1] for DUzkx in DUzgx_blk] if any([any(Dzkx <= 0) for Dzkx in self.Dzgx_blk]): self.feas = False return self.feas # Check that t > -S(G(X)) + S(Z(G(X))) self.log_Dgx_blk = log_Dgx_blk = [np.log(D) for D in self.Dgx_blk] self.log_Dzgx_blk = log_Dzgx_blk = [np.log(D) for D in self.Dzgx_blk] entr_GX = [inp(D, log_D) for (D, log_D) in zip(Dgx_blk, log_Dgx_blk)] entr_ZGX = [inp(D, log_D) for (D, log_D) in zip(Dzgx_blk, log_Dzgx_blk)] self.z = t[0, 0] - (sum(entr_GX) - sum(entr_ZGX)) self.feas = self.z > 0 return self.feas def get_val(self): assert self.feas_updated return -np.log(self.z) - np.sum(np.log(self.Dx)) def update_grad(self): assert self.feas_updated assert not self.grad_updated # Compute gradients of quantum relative entropy # DPhi(X) = G'[log(G(X)) + I] - G'Z'[log(G(Z(X))) + I] log_GX_blk = [ (U * logD) @ U.conj().T + np.eye(n) for (U, logD, n) in zip(self.Ugx_blk, self.log_Dgx_blk, self.Nk) ] log_ZGX_blk = [ (U * logD) @ U.conj().T + np.eye(n) for (U, logD, n) in zip(self.Uzgx_blk, self.log_Dzgx_blk, self.Nzk) ] G_log_GX = sum( [ apply_kraus(logX, K_list, adjoint=True) for (logX, K_list) in zip(log_GX_blk, self.K_list_blk) ] ) ZG_log_ZGX = sum( [ apply_kraus(logX, ZK_list, adjoint=True) for (logX, ZK_list) in zip(log_ZGX_blk, self.ZK_list_blk) ] ) self.DPhi = G_log_GX - ZG_log_ZGX # Compute X^-1 inv_Dx = np.reciprocal(self.Dx) inv_X_rt2 = self.Ux * np.sqrt(inv_Dx) self.inv_X = inv_X_rt2 @ inv_X_rt2.conj().T # Compute gradient of barrier function self.zi = np.reciprocal(self.z) self.grad = [-self.zi, self.zi * self.DPhi - self.inv_X] self.grad_updated = True def update_hessprod_aux(self): assert not self.hess_aux_updated assert self.grad_updated self.zi2 = self.zi * self.zi self.D1gx_log_blk = [ D1_log(D, log_D) for (D, log_D) in zip(self.Dgx_blk, self.log_Dgx_blk) ] self.D1zgx_log_blk = [ D1_log(D, log_D) for (D, log_D) in zip(self.Dzgx_blk, self.log_Dzgx_blk) ] if self.G_is_Id: D1x_inv = np.reciprocal(np.outer(self.Dx, self.Dx)) self.D1x_comb = self.zi * self.D1gx_log_blk[0] + D1x_inv self.hess_aux_updated = True return def hess_prod_ip(self, out, H): assert self.grad_updated if not self.hess_aux_updated: self.update_hessprod_aux() (Ht, Hx) = H K_list_blk, ZK_list_blk = self.K_list_blk, self.ZK_list_blk Ugx_blk, Uzgx_blk = self.Ugx_blk, self.Uzgx_blk D1gx_log_blk, D1zgx_log_blk = self.D1gx_log_blk, self.D1zgx_log_blk GH_blk = [apply_kraus(Hx, K_list) for K_list in K_list_blk] ZGH_blk = [apply_kraus(Hx, ZK_list) for ZK_list in ZK_list_blk] UGHU_blk = [U.conj().T @ H @ U for (H, U) in zip(GH_blk, Ugx_blk)] UZGHU_blk = [U.conj().T @ H @ U for (H, U) in zip(ZGH_blk, Uzgx_blk)] # Hessian products for quantum key distribution # D2Phi(X)[H] # = G'(Ugx [log^[1](Dgx) .* (Ugx' G(H) Ugx)] Ugx') # - G'(Z'(Uzgx [log^[1](Dzgx) .* (Uzgx' Z(G(H)) Uzgx)] Uzgx')) D2PhiH = np.zeros_like(self.X) # First term, i.e., G'(Ugx [log^[1](Dgx) .* (Ugx' G(H) Ugx)] Ugx') for k, (U_k, D1_k) in enumerate(zip(Ugx_blk, D1gx_log_blk)): temp = U_k @ (D1_k * UGHU_blk[k]) @ U_k.conj().T D2PhiH += apply_kraus(temp, K_list_blk[k], adjoint=True) # Second term, i.e., -G'(Z'(Uzgx [ ... ] Uzgx')) for k, (U_k, D1_k) in enumerate(zip(Uzgx_blk, D1zgx_log_blk)): temp = U_k @ (D1_k * UZGHU_blk[k]) @ U_k.conj().T D2PhiH -= apply_kraus(temp, ZK_list_blk[k], adjoint=True) # ====================================================================== # Hessian products with respect to t # ====================================================================== # D2_t F(t, X)[Ht, Hx] = (Ht - D_X Phi(X)[Hx]) / z^2 out[0][:] = (Ht - inp(self.DPhi, Hx)) * self.zi2 # ====================================================================== # Hessian products with respect to X # ====================================================================== # D2_X F(t, X)[Ht, Hx] = -D2_t F(t, X)[Ht, Hx] * DPhi(X) # + D2Phi(X)[Hx] / z + X^-1 Hx X^-1 out_X = -out[0] * self.DPhi out_X += self.zi * D2PhiH out_X += self.inv_X @ Hx @ self.inv_X out[1][:] = (out_X + out_X.conj().T) * 0.5 return out def hess_congr(self, A): assert self.grad_updated if not self.hess_aux_updated: self.update_hessprod_aux() if not self.congr_aux_updated: self.congr_aux(A) if not self.hess_congr_aux_updated: self.update_hess_congr_aux(A) p = A.shape[0] lhs = np.zeros((p, sum(self.dim))) Z_idxs = self.Z_idxs K_list_blk, ZK_list_blk = self.K_list_blk, self.ZK_list_blk Ugx_blk, Uzgx_blk = self.Ugx_blk, self.Uzgx_blk D1gx_log_blk, D1zgx_log_blk = self.D1gx_log_blk, self.D1zgx_log_blk Work0, Work1 = self.Work0, self.Work1 Work2, Work3 = self.Work2, self.Work3 # ====================================================================== # Hessian products with respect to t # ====================================================================== # D2_t F(t, X)[Ht, Hx] = (Ht - D_X Phi(X)[Hx]) / z^2 Ax_vec = self.Ax.view(np.float64).reshape((p, 1, -1)) DPhi_vec = self.DPhi.view(np.float64).reshape((-1, 1)) outt = self.At - (Ax_vec @ DPhi_vec).ravel() outt *= self.zi2 lhs[:, 0] = outt # ====================================================================== # Hessian products with respect to X # ====================================================================== # Hessian products for quantum key distribution # D2Phi(X)[H] # = G'(Ugx [log^[1](Dgx) .* (Ugx' G(H) Ugx)] Ugx') # - G'(Z'(Uzgx [log^[1](Dzgx) .* (Uzgx' Z(G(H)) Uzgx)] Uzgx')) if self.G_is_Id: work1, work2, work3 = self.work1, self.work2, self.work2 # If G(X) = X, then we can do some more efficient slicing operations # Compute second term, i.e., -Z'(Uzx [log^[1](Dzx) .* ... ] Uzx') Work0 *= 0 for Z_idxs_k, U_k, D1_log_k in zip(Z_idxs, Uzgx_blk, D1zgx_log_blk): # Apply Z(Ax), i.e., extract k-th submatrix from Ax temp = self.Ax[np.ix_(np.arange(p), Z_idxs_k, Z_idxs_k)] # Compute Uzx [log^[1](Dzx) .* (Uzgx' Z(Ax) Uzx)] Uzx' congr_multi(work2, U_k.conj().T, temp, work3) work2 *= D1_log_k * self.zi congr_multi(work1, U_k, work2, work3) # Apply Z'( ... ), i.e., place result in k-th submatrix Work0[np.ix_(np.arange(p), Z_idxs_k, Z_idxs_k)] = work1 # Compute first term, i.e., Ux [log^[1](Dx) .* (Ux' G(H) Ux)] Ux' # Also combine this with 1/z ( ... ) + X^-1 Ax X^-1 congr_multi(Work2, self.Ux.conj().T, self.Ax, Work3) self.Work2 *= self.D1x_comb congr_multi(Work1, self.Ux, Work2, Work3) # Subtract two terms to get D2Phi(X)[H] Work1 -= Work0 else: Work1 *= 0 # Get first term, i.e., G'(Ugx [log^[1](Dgx) .* ... ] Ugx') for k in range(len(K_list_blk)): worka, workb = self.Work4[k], self.Work4b[k] workc, workd = self.Work5[k], self.Work5b[k] workc *= 0 KU_list = [K.conj().T @ Ugx_blk[k] for K in K_list_blk[k]] # Apply Ugx' G(Ax) Ugx for KU in KU_list: congr_multi(workd, KU.conj().T, self.Ax, work=workb) workc += workd # Apply log^[1](Dgx) .* ( ... ) workc *= D1gx_log_blk[k] * self.zi # Apply G'(Ugx [ ... ] Ugx') for KU in KU_list: congr_multi(Work0, KU, workc, work=worka) Work1 += Work0 # Get second term, i.e., G'(Z'(Uzgx [log^[1](Dzgx) .* ... ] Uzgx')) for k in range(len(ZK_list_blk)): worka, workb = self.Work6[k], self.Work6b[k] workc, workd = self.Work7[k], self.Work7b[k] workc *= 0 KU_list = [K.conj().T @ Uzgx_blk[k] for K in ZK_list_blk[k]] # Apply Uzgx' Z(G(H)) Uzgx for KU in KU_list: congr_multi(workd, KU.conj().T, self.Ax, work=workb) workc += workd # Apply log^[1](Dzgx) .* ( ... ) workc *= D1zgx_log_blk[k] * self.zi # Apply G'(Z'(Uzgx [ ... ] Uzgx')) for KU in KU_list: congr_multi(Work0, KU, workc, work=worka) Work1 -= Work0 # Compute X^-1 Ax X^-1 congr_multi(Work0, self.inv_X, self.Ax, Work2) Work1 += Work0 # Hessian product of barrier function # D2_X F(t, X)[Ht, Hx] = -D2_t F(t, X)[Ht, Hx] * DPhi(X) # + D2Phi(X)[Hx] / z + X^-1 Hx X^-1 np.outer(outt, self.DPhi, out=Work0.reshape(p, -1)) Work1 -= Work0 lhs[:, 1:] = Work1.reshape((p, -1)).view(np.float64) # Multiply A (H A') return dense_dot_x(lhs, A.T) def invhess_prod_ip(self, out, H): assert self.grad_updated if not self.hess_aux_updated: self.update_hessprod_aux() if not self.invhess_aux_updated: self.update_invhessprod_aux() (Ht, Hx) = H Wx = Hx + Ht * self.DPhi # ====================================================================== # Inverse Hessian products with respect to X # ====================================================================== # Compute X = M \ Wx, where we solve this using two different strategies if self.G_is_Id: # If G(X) = X, then solve M using the matrix inversion lemma work = np.zeros((self.r * self.vm)) # Apply D2S(X)^-1 = (Ux⊗Ux) (1/z log + inv)^[1](Dx)^-1 (Ux'⊗Ux') temp = self.Ux.conj().T @ Wx @ self.Ux out_X = self.Ux @ (self.D1x_comb_inv * temp) @ self.Ux.conj().T # Apply Z( ... ), i.e., extract k-th submatrix then get compact # vectorization of this submatrix for k, Z_idxs_k in enumerate(self.Z_idxs): temp_k = out_X[np.ix_(Z_idxs_k, Z_idxs_k)] temp_vec = temp_k.view(np.float64).reshape(-1) work[k * self.vm : (k + 1) * self.vm] = self.F2C_op @ temp_vec work *= -1 # Solve linear system N \ ( ... ) work = cho_solve(self.schur_fact, work) # Apply Z'( ... ), i.e., recover submatrix from compact # vectorization then place in k-th submatrix Work3 = np.zeros((self.n, self.n), dtype=self.dtype) for k, Z_idxs_k in enumerate(self.Z_idxs): work_k = work[k * self.vm : (k + 1) * self.vm] work_k = self.F2C_op.T @ work_k work_k = work_k.view(self.dtype).reshape((self.m, self.m)) Work3[np.ix_(Z_idxs_k, Z_idxs_k)] = work_k # Apply D2S(X)^-1 = (Ux ⊗ Ux) log^[1](Dx) (Ux' ⊗ Ux') and subtract # from (D2S(X)^-1 Wx) to get X temp = self.Ux.conj().T @ Work3 @ self.Ux out_X -= self.Ux @ (self.D1x_comb_inv * temp) @ self.Ux.conj().T else: # Otherwise, directly build M and Cholesky factor to solve M \ Wx # Convert matrices to compact real vectors temp_vec = Wx.view(np.float64).reshape((-1, 1)) temp_vec = self.F2C_op @ temp_vec # Solve system temp_vec = cho_solve(self.hess_fact, temp_vec) # Expand compact real vectors back into full matrices temp_vec = self.F2C_op.T @ temp_vec out_X = temp_vec.T.view(self.dtype).reshape((self.n, self.n)) out[1][:] = (out_X + out_X.conj().T) * 0.5 # ====================================================================== # Inverse Hessian products with respect to t # ====================================================================== # Compute t = z^2 Ht + <DPhi(X), X> out[0][:] = self.z2 * Ht + inp(self.DPhi, out[1]) return out def invhess_congr(self, A): assert self.grad_updated if not self.hess_aux_updated: self.update_hessprod_aux() if not self.invhess_aux_updated: self.update_invhessprod_aux() if not self.congr_aux_updated: self.congr_aux(A) if not self.invhess_congr_aux_updated: self.update_invhess_congr_aux(A) # The inverse Hessian product applied on (Ht, Hx, Hy) for the OPT # barrier is # X = M \ Wx # t = z^2 Ht + <DPhi(X), X> # where Wx = Hx + Ht DPhi(X) and M = 1/z D2Phi + X^1 ⊗ X^-1 if self.G_is_Id: # When G(X) = X, we can write the expression for M as # M = 1/z D2S(X) - 1/z Z' D2S(Z(X)) Z + X^-1 ⊗ X^-1 # = (Ux ⊗ Ux) (1/z log + inv)^[1](Dx) (Ux' ⊗ Ux') # - 1/z Z' (Uzx ⊗ Uzx) log^[1](Dzx) (Uzx' ⊗ Uzx') Z # Treating [Z' D2S(Z(X)) Z] as a low-rank perturbation of D2S(X), we # can solve linear systems with M by using the matrix inversion # lemma # X = [D2S(X)^-1 - D2S(X)^-1 Z' N^-1 Z D2S(X)^-1] Wx # where # N = 1/z (Uzx ⊗ Uzx) [log^[1](Dzx)]^-1 (Uzx' ⊗ Uzx') # - Z (Ux ⊗ Ux) [(1/z log + inv)^[1](Dx)]^-1 (Ux' ⊗ Ux') Z' p = A.shape[0] lhs = np.zeros((p, sum(self.dim))) work = np.zeros((self.r * self.vm, p)) Work0, Work1 = self.Work0, self.Work1 Work2, Work3 = self.Work2, self.Work3 # ================================================================== # Inverse Hessian products with respect to X # ================================================================== # Compute Wx = Hx + Ht DPhi(X) np.outer(self.At, self.DPhi, out=Work2.reshape((p, -1))) np.add(self.Ax, Work2, out=Work0) # Apply D2S(X)^-1 = (Ux⊗Ux) (1/z log + inv)^[1](Dx)^-1 (Ux'⊗Ux') congr_multi(Work2, self.Ux.conj().T, Work0, Work3) Work2 *= self.D1x_comb_inv congr_multi(Work0, self.Ux, Work2, Work3) # Apply Z( ... ), i.e., extract k-th submatrix then get compact # vectorization of this submatrix for k, Z_idxs_k in enumerate(self.Z_idxs): temp = Work0[np.ix_(np.arange(p), Z_idxs_k, Z_idxs_k)] temp = temp.view(np.float64).reshape((p, -1)).T temp = x_dot_dense(self.F2C_op, temp) work[k * self.vm : (k + 1) * self.vm] = temp work *= -1 # Solve linear system N \ ( ... ) work = cho_solve(self.schur_fact, work) # Apply Z'( ... ), i.e., recover submatrix from compact # vectorization then place in k-th submatrix Work1.fill(0.0) for k, Z_idxs_k in enumerate(self.Z_idxs): work_k = work[k * self.vm : (k + 1) * self.vm] work_k = x_dot_dense(self.F2C_op.T, work_k) work_k = work_k.T.view(self.dtype).reshape(p, self.m, self.m) Work1[np.ix_(np.arange(p), Z_idxs_k, Z_idxs_k)] = work_k # Apply D2S(X)^-1 = (Ux⊗Ux) (1/z log + inv)^[1](Dx)^-1 (Ux'⊗Ux') congr_multi(Work2, self.Ux.conj().T, Work1, Work3) Work2 *= self.D1x_comb_inv congr_multi(Work1, self.Ux, Work2, Work3) # Subtract previous expression from (D2S(X)^-1 Wx) to get X Work0 -= Work1 out_X = Work0.reshape((p, -1)).view(np.float64) lhs[:, 1:] = out_X # ================================================================== # Inverse Hessian products with respect to t # ================================================================== # Compute t = z^2 Ht + <DPhi(X), X> DPhi_vec = self.DPhi.view(np.float64).reshape((-1, 1)) out_t = self.z2 * self.At out_t += (out_X @ DPhi_vec).ravel() lhs[:, 0] = out_t # Multiply A (H A') return dense_dot_x(lhs, A.T) else: # Otherwise, we will directly build M and Cholesky factor it to # solve X = M \ Wx # ================================================================== # Inverse Hessian products with respect to X # ================================================================== # Compute Wx = Hx + Ht DPhi(X) np.outer(self.At, self.DPhi_cvec, out=self.work0) self.work0 += self.Ax_cvec # Solve system X = M \ Wx out_X = cho_solve(self.hess_fact, self.work0.T) # ================================================================== # Inverse Hessian products with respect to t # ================================================================== # Compute t = z^2 Ht + <DPhi(X), X> out_t = self.z2 * self.At out_t += (out_X.T @ self.DPhi_cvec).ravel() # Multiply A (H A') out = np.outer(out_t, self.At) out += x_dot_dense(self.Ax_cvec, out_X) return out def third_dir_deriv_axpy(self, out, H, a=True): assert self.grad_updated if not self.hess_aux_updated: self.update_hessprod_aux() if not self.dder3_aux_updated: self.update_dder3_aux() (Ht, Hx) = H K_list_blk, ZK_list_blk = self.K_list_blk, self.ZK_list_blk Ugx_blk, Uzgx_blk = self.Ugx_blk, self.Uzgx_blk D1gx_log_blk, D1zgx_log_blk = self.D1gx_log_blk, self.D1zgx_log_blk D2gx_log_blk, D2zgx_log_blk = self.D2gx_log_blk, self.D2zgx_log_blk GH_blk = [apply_kraus(Hx, K_list) for K_list in K_list_blk] ZGH_blk = [apply_kraus(Hx, ZK_list) for ZK_list in ZK_list_blk] UGHU_blk = [U.conj().T @ H @ U for (H, U) in zip(GH_blk, Ugx_blk)] UZGHU_blk = [U.conj().T @ H @ U for (H, U) in zip(ZGH_blk, Uzgx_blk)] # Quantum key distribution Hessians D2PhiH = np.zeros_like(self.X) # Hessians of S(G(X)) for k, (U_k, D1_k) in enumerate(zip(Ugx_blk, D1gx_log_blk)): temp = U_k @ (D1_k * UGHU_blk[k]) @ U_k.conj().T D2PhiH += apply_kraus(temp, K_list_blk[k], adjoint=True) # Hessians of S(Z(G(X))) for k, (U_k, D1_k) in enumerate(zip(Uzgx_blk, D1zgx_log_blk)): temp = U_k @ (D1_k * UZGHU_blk[k]) @ U_k.conj().T D2PhiH -= apply_kraus(temp, ZK_list_blk[k], adjoint=True) # Quantum key distribution third order derivatives D3PhiHH = np.zeros_like(self.X) # Third order derivatives of S(G(X)) for k, (U_k, D2_k) in enumerate(zip(Ugx_blk, D2gx_log_blk)): temp = scnd_frechet(D2_k * UGHU_blk[k], UGHU_blk[k], U=U_k) D3PhiHH += apply_kraus(temp, K_list_blk[k], adjoint=True) # Third order derivatives of S(Z(G(X))) for k, (U_k, D2_k) in enumerate(zip(Uzgx_blk, D2zgx_log_blk)): temp = scnd_frechet(D2_k * UZGHU_blk[k], UZGHU_blk[k], U=U_k) D3PhiHH -= apply_kraus(temp, ZK_list_blk[k], adjoint=True) # Third derivative of barrier DPhiH = inp(self.DPhi, Hx) D2PhiHH = inp(D2PhiH, Hx) chi = Ht - DPhiH chi2 = chi * chi dder3_t = -2 * self.zi3 * chi2 - self.zi2 * D2PhiHH dder3_X = -dder3_t * self.DPhi dder3_X -= 2 * self.zi2 * chi * D2PhiH dder3_X += self.zi * D3PhiHH dder3_X -= 2 * self.inv_X @ Hx @ self.inv_X @ Hx @ self.inv_X dder3_X = (dder3_X + dder3_X.conj().T) * 0.5 out[0][:] += dder3_t * a out[1][:] += dder3_X * a return out # ======================================================================== # Auxilliary functions # ======================================================================== def congr_aux(self, A): assert not self.congr_aux_updated iscomplex = self.iscomplex # Get slices and views of A matrix to be used in congruence computations if sp.sparse.issparse(A): A = A.tocsr() self.Ax_vec = A[:, 1:] if sp.sparse.issparse(A): A = A.toarray() Ax_dense = np.ascontiguousarray(A[:, 1:]) self.At = A[:, 0] self.Ax = np.array([vec_to_mat(Ax_k, iscomplex) for Ax_k in Ax_dense]) self.congr_aux_updated = True def update_hess_congr_aux(self, A): assert not self.hess_congr_aux_updated p = A.shape[0] self.Work0 = np.zeros_like(self.Ax, dtype=self.dtype) self.Work1 = np.zeros_like(self.Ax, dtype=self.dtype) self.Work2 = np.zeros_like(self.Ax, dtype=self.dtype) self.Work3 = np.zeros_like(self.Ax, dtype=self.dtype) if self.G_is_Id: self.work1 = np.empty((p, self.m, self.m), dtype=self.dtype) self.work2 = np.empty((p, self.m, self.m), dtype=self.dtype) self.work3 = np.empty((p, self.m, self.m), dtype=self.dtype) else: n, Nk, Nzk, dtype = self.n, self.Nk, self.Nzk, self.dtype self.Work4 = [np.zeros((p, n, nk), dtype=dtype) for nk in Nk] self.Work4b = [np.zeros((p, nk, n), dtype=dtype) for nk in Nk] self.Work5 = [np.zeros((p, nk, nk), dtype=dtype) for nk in Nk] self.Work5b = [np.zeros((p, nk, nk), dtype=dtype) for nk in Nk] self.Work6 = [np.zeros((p, n, nzk), dtype=dtype) for nzk in Nzk] self.Work6b = [np.zeros((p, nzk, n), dtype=dtype) for nzk in Nzk] self.Work7 = [np.zeros((p, nzk, nzk), dtype=dtype) for nzk in Nzk] self.Work7b = [np.zeros((p, nzk, nzk), dtype=dtype) for nzk in Nzk] self.hess_congr_aux_updated = True def update_invhess_congr_aux(self, A): assert not self.invhess_congr_aux_updated if self.G_is_Id: self.Work0 = np.zeros_like(self.Ax, dtype=self.dtype) self.Work1 = np.zeros_like(self.Ax, dtype=self.dtype) self.Work2 = np.zeros_like(self.Ax, dtype=self.dtype) self.Work3 = np.zeros_like(self.Ax, dtype=self.dtype) else: if sp.sparse.issparse(A): A = A.tocsr() self.Ax_cvec = (self.F2C_op @ A[:, 1:].T).T if sp.sparse.issparse(A): self.Ax_cvec = self.Ax_cvec.tocoo() self.work0 = np.zeros(self.Ax_cvec.shape) self.work1 = np.zeros(self.Ax_cvec.shape) self.invhess_congr_aux_updated = True def update_invhessprod_aux_aux(self): assert not self.invhess_aux_aux_updated if self.G_is_Id: self.work6 = np.zeros((self.vm, self.m, self.m), dtype=self.dtype) self.work7 = np.zeros((self.vm, self.m, self.m), dtype=self.dtype) self.work8 = np.zeros((self.vm, self.m, self.m), dtype=self.dtype) # Computational basis for symmetric/Hermitian matrices rt2 = np.sqrt(0.5) n, r, m, vm = self.n, self.r, self.m, self.vm self.E_blk = np.zeros((r, vm, n, n), dtype=self.dtype) for b, Z_idxs_k in enumerate(self.Z_idxs): k = 0 for j_subblk in range(m): j = Z_idxs_k[j_subblk] for i_subblk in range(j_subblk): i = Z_idxs_k[i_subblk] self.E_blk[b, k, i, j] = rt2 self.E_blk[b, k, j, i] = rt2 k += 1 if self.iscomplex: self.E_blk[b, k, i, j] = rt2 * 1j self.E_blk[b, k, j, i] = rt2 * -1j k += 1 self.E_blk[b, k, j, j] = 1.0 k += 1 self.E_blk = self.E_blk.reshape((-1, self.n, self.n)) self.Work6 = np.zeros((r * vm, n, n), dtype=self.dtype) self.Work7 = np.zeros((r * vm, n, n), dtype=self.dtype) self.Work8 = np.zeros((r * vm, n, n), dtype=self.dtype) else: dtype = self.dtype n, vn, Nk, Nzk = self.n, self.vn, self.Nk, self.Nzk self.work2 = [np.zeros((vn, n, nk), dtype=dtype) for nk in Nk] self.work2b = [np.zeros((vn, nk, n), dtype=dtype) for nk in Nk] self.work3 = [np.zeros((vn, nk, nk), dtype=dtype) for nk in Nk] self.work3b = [np.zeros((vn, nk, nk), dtype=dtype) for nk in Nk] self.work4 = [np.zeros((vn, n, nzk), dtype=dtype) for nzk in Nzk] self.work4b = [np.zeros((vn, nzk, n), dtype=dtype) for nzk in Nzk] self.work5 = [np.zeros((vn, nzk, nzk), dtype=dtype) for nzk in Nzk] self.work5b = [np.zeros((vn, nzk, nzk), dtype=dtype) for nzk in Nzk] self.work6 = np.zeros((vn, n, n), dtype=dtype) self.work7 = np.zeros((vn, n, n), dtype=dtype) self.work8 = np.zeros((vn, n, n), dtype=dtype) self.invhess_aux_aux_updated = True def update_invhessprod_aux(self): assert not self.invhess_aux_updated assert self.grad_updated assert self.hess_aux_updated if not self.invhess_aux_aux_updated: self.update_invhessprod_aux_aux() self.z2 = self.z * self.z if self.G_is_Id: # Precompute and factorize the matrix # N = z (Uzx ⊗ Uzx) [log^[1](Dzx)]^-1 (Uzx' ⊗ Uzx') # - Z (Ux ⊗ Ux) [(1/z log + inv)^[1](Dx)]^-1 (Ux' ⊗ Ux') Z' # which we will need to solve linear systems with the Hessian of # our barrier function self.D1x_comb_inv = np.reciprocal(self.D1x_comb) r, vm = self.r, self.vm Uzgx_blk, D1zgx_log_blk = self.Uzgx_blk, self.D1zgx_log_blk work6, work7, work8 = self.work6, self.work7, self.work8 Work6, Work7, Work8 = self.Work6, self.Work7, self.Work8 self.schur = np.zeros((r * vm, r * vm)) # ================================================================== # Get first term, i.e., 1/z (Uzx ⊗ Uzx) ... (Uzx' ⊗ Uzx') # ================================================================== for k, (U, D1_log) in enumerate(zip(Uzgx_blk, D1zgx_log_blk)): # Begin with (Uzx' ⊗ Uzx') congr_multi(work8, U.conj().T, self.E, work=work7) # Apply z [log^[1](Dzx)]^-1 work8 *= self.z * np.reciprocal(D1_log) # Apply (Uzx ⊗ Uzx) congr_multi(work6, U, work8, work=work7) work = work6.view(np.float64).reshape((vm, -1)) work = x_dot_dense(self.F2C_op, work.T) self.schur[k * vm : (k + 1) * vm, k * vm : (k + 1) * vm] = work # ================================================================== # Get second term, i.e., Z (Ux ⊗ Ux) ... (Ux' ⊗ Ux') Z' # ================================================================== # Begin with (Ux' ⊗ Ux') Z' congr_multi(Work8, self.Ux.conj().T, self.E_blk, work=Work7) # Apply [(1/z log + inv)^[1](Dx)]^-1 Work8 *= self.D1x_comb_inv # Apply (Ux ⊗ Ux) congr_multi(Work6, self.Ux, Work8, work=Work7) # Apply Z, i.e., extract submatrices for k, Z_idxs_k in enumerate(self.Z_idxs): temp = Work6[np.ix_(np.arange(r * vm), Z_idxs_k, Z_idxs_k)] temp = temp.view(np.float64).reshape((r * vm, -1)) temp = x_dot_dense(self.F2C_op, temp.T).T self.schur[:, k * vm : (k + 1) * vm] -= temp # Subtract terms to obtain N then Cholesky factor self.schur_fact = cho_fact(self.schur) else: # Precompute and factorize the matrix # M = 1/z G' (Ugx ⊗ Ugx) log^[1](Dgx) (Ugx' ⊗ Ugx') G # - 1/z G'Z' (Uzgx ⊗ Uzgx) log^[1](Dzgx) (Uzgx' ⊗ Uzgx') ZG # + X^-1 ⊗ X^-1 # which we will need to solve linear systems with the Hessian of # our barrier function K_list_blk, ZK_list_blk = self.K_list_blk, self.ZK_list_blk Ugx_blk, Uzgx_blk = self.Ugx_blk, self.Uzgx_blk D1gx_log_blk, D1zgx_log_blk = self.D1gx_log_blk, self.D1zgx_log_blk work7, work8 = self.work7, self.work8 DPhi_vec = self.DPhi.view(np.float64).reshape(-1, 1) self.DPhi_cvec = self.F2C_op @ DPhi_vec # ================================================================== # Get third term, i.e., X^-1 ⊗ X^-1 # ================================================================== congr_multi(work8, self.inv_X, self.E, work=work7) # ================================================================== # Get first term, i.e., 1/z G' (Ugx ⊗ Ugx) ... (Ugx' ⊗ Ugx') G # ================================================================== for k in range(len(K_list_blk)): worka, workb = self.work2[k], self.work2b[k] workc, workd = self.work3[k], self.work3b[k] workc *= 0 KU_list = [K.conj().T @ Ugx_blk[k] for K in K_list_blk[k]] # Apply Ugx' G(Ax) Ugx for KU in KU_list: congr_multi(workd, KU.conj().T, self.E, work=workb) workc += workd # Apply log^[1](Dgx) .* ( ... ) workc *= D1gx_log_blk[k] * self.zi # Apply G'(Ugx [ ... ] Ugx') for KU in KU_list: congr_multi(work7, KU, workc, work=worka) work8 += work7 # ================================================================== # Get second term, i.e., 1/z G'Z' [ ... ] ZG # ================================================================== for k in range(len(ZK_list_blk)): worka, workb = self.work4[k], self.work4b[k] workc, workd = self.work5[k], self.work5b[k] workc *= 0 KU_list = [K.conj().T @ Uzgx_blk[k] for K in ZK_list_blk[k]] # Apply Uzgx' Z(G(Ax)) Uzgx for KU in KU_list: congr_multi(workd, KU.conj().T, self.E, work=workb) workc += workd # Apply log^[1](Dzgx) .* ( ... ) workc *= D1zgx_log_blk[k] * self.zi # Apply G'Z'((Uzgx [ ... ] Uzgx')) for KU in KU_list: congr_multi(work7, KU, workc, work=worka) work8 -= work7 # Get Hessian and factorize work = work8.view(np.float64).reshape((self.vn, -1)) self.hess = x_dot_dense(self.F2C_op, work.T) self.hess = (self.hess + self.hess.T) * 0.5 self.hess_fact = cho_fact(self.hess) self.invhess_aux_updated = True def update_dder3_aux(self): assert not self.dder3_aux_updated assert self.hess_aux_updated self.zi3 = self.zi2 * self.zi self.D2gx_log_blk = [ D2_log(D, D1) for (D, D1) in zip(self.Dgx_blk, self.D1gx_log_blk) ] self.D2zgx_log_blk = [ D2_log(D, D1) for (D, D1) in zip(self.Dzgx_blk, self.D1zgx_log_blk) ] self.dder3_aux_updated = True return
def facial_reduction(K_list): # For a set of Kraus operators i.e., Σ_i K_i @ X @ K_i.T, returns a set of # Kraus operators which preserves positive definiteness nk = K_list[0].shape[0] # Pass identity matrix (maximally mixed state) through the Kraus operators KK = sum([K @ K.conj().T for K in K_list]) # Determine if output is low rank, in which case we need to perform facial # reduction Dkk, Ukk = np.linalg.eigh(KK) KKnzidx = np.where(Dkk > 1e-12)[0] nk_fr = np.size(KKnzidx) if nk == nk_fr: return K_list # Perform facial reduction Qkk = Ukk[:, KKnzidx] K_list_fr = [Qkk.conj().T @ K for K in K_list] return K_list_fr def apply_kraus(x, Klist, adjoint=False): # Compute congruence map if adjoint: return sum([K.conj().T @ x @ K for K in Klist]) else: return sum([K @ x @ K.conj().T for K in Klist])