# Source code for strawberryfields.apps.train.cost

# Copyright 2020 Xanadu Quantum Technologies Inc.

# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
r"""
Submodule for computing gradients and evaluating cost functions with respect to GBS circuits
"""
from typing import Callable

import numpy as np

from strawberryfields.apps.train.param import VGBS, _Omat

[docs]class KL:
r"""Kullback-Liebler divergence cost function.

In a standard unsupervised learning scenario, data are assumed to be sampled from an unknown
distribution and a common goal is to learn that distribution. Training of a model
distribution can be performed by minimizing the Kullback-Leibler (KL) divergence, which up to
additive constants can be written as:

.. math::

KL = -\frac{1}{T}\sum_S \log[P(S)],

where :math:S is an element of the data, :math:P(S) is the probability of observing that
element when sampling from the GBS distribution, and :math:T is the total number of elements
in the data. For the GBS distribution in the WAW parametrization, the gradient of the KL
divergence can be written as

.. math::

\partial_\theta KL(\theta) = - \sum_{k=1}^m\frac{1}{w_k}(\langle n_k\rangle_{\text{data}}-
\langle n_k\rangle_{\text{GBS}})\partial_\theta w_k,

where :math:\langle n_k\rangle denotes the average photon numbers in mode *k*. This class
provides methods to compute gradients and evaluate the cost function.

**Example usage**

>>> embedding = train.embed.Exp(4)
>>> A = np.ones((4, 4))
>>> vgbs = train.VGBS(A, 3, embedding, threshold=True)
>>> params = np.array([0.05, 0.1, 0.02, 0.01])
>>> data = np.zeros((4, 4))
>>> kl = cost.KL(data, vgbs)
>>> kl.evaluate(params)
-0.2866830267216749
array([-0.52812574, -0.5201932 , -0.53282312, -0.53437824])

Args:
data (array): Array of samples representing the training data
vgbs (train.VGBS): Variational GBS class

"""

def __init__(self, data: np.ndarray, vgbs: VGBS):
self.data = data
self.vgbs = vgbs
self.nr_samples, self.nr_modes = np.shape(self.data)
self.mean_n_data = np.mean(self.data, axis=0)

[docs]    def grad(self, params: np.ndarray) -> np.ndarray:
r"""Calculates the gradient of the Kullback-Liebler cost function with respect to the
trainable parameters

**Example usage**

array([-0.52812574, -0.5201932 , -0.53282312, -0.53437824])

Args:
params (array[float]): the trainable parameters :math:\theta
Returns:
array: the gradient of the KL cost function with respect to :math:\theta
"""
weights = self.vgbs.embedding(params)
if self.vgbs.threshold:
n_diff = self.vgbs.mean_clicks_by_mode(params) - self.mean_n_data
else:
n_diff = self.vgbs.mean_photons_by_mode(params) - self.mean_n_data
return (n_diff / weights) @ self.vgbs.embedding.jacobian(params)

[docs]    def evaluate(self, params: np.ndarray) -> float:
r"""Computes the value of the Kullback-Liebler divergence cost function.

**Example usage**

>>> kl.evaluate(params)
-0.2866830267216749

Args:
params (array): the trainable parameters :math:\theta
Returns:
float: the value of the cost function
"""
kl = 0
for sample in self.data:
kl += np.log(self.vgbs.prob_sample(params, sample))
return -kl / self.nr_samples

def __call__(self, params: np.ndarray) -> float:
return self.evaluate(params)

[docs]class Stochastic:
r"""Stochastic cost function given by averaging over samples from a trainable GBS distribution.

A stochastic optimization problem is defined with respect to a function :math:h(\bar{n}) that
assigns a cost to an input sample :math:\bar{n}. The cost function is the
average of :math:h(\bar{n}) over samples generated from a parametrized distribution
:math:P_{\theta}(\bar{n}):

.. math::

C (\theta) = \sum_{\bar{n}} h(\bar{n}) P_{\theta}(\bar{n})

The cost function :math:C (\theta) can then be optimized by varying
:math:P_{\theta}(\bar{n}).

In this setting, :math:P_{\theta}(\bar{n}) is the variational GBS distribution and is
specified in :class:~.Stochastic by an instance of :class:~.train.VGBS.

**Example usage:**

The function :math:h(\bar{n}) can be viewed as an energy. Clicks in odd-numbered modes
decrease the total energy, while clicks in even-numbered modes increase it.

>>> embedding = train.embed.Exp(4)
>>> A = np.ones((4, 4))
>>> vgbs = train.VGBS(A, 3, embedding, threshold=True)
>>> h = lambda x: sum([x[i] * (-1) ** (i + 1) for i in range(4)])
>>> cost = Stochastic(h, vgbs)
>>> params = np.array([0.05, 0.1, 0.02, 0.01])
>>> cost.evaluate(params, 100)
0.03005489236683591
array([ 0.10880756, -0.1247146 ,  0.12426481, -0.13783342])

Args:
h (callable): a function that assigns a cost to an input sample
vgbs (train.VGBS): the trainable GBS distribution, which must be an instance of
:class:~.train.VGBS
"""

def __init__(self, h: Callable, vgbs: VGBS):
self.h = h
self.vgbs = vgbs

[docs]    def evaluate(self, params: np.ndarray, n_samples: int) -> float:
r"""Evaluates the cost function.

The cost function can be evaluated by finding its average over samples generated from the
VGBS system using the trainable parameters :math:\theta:

.. math::

C (\theta) = \sum_{\bar{n}} h(\bar{n}) P_{\theta}(\bar{n})

Alternatively, the cost function can be evaluated by finding a different average over
samples from the input adjacency matrix to the VGBS system:

.. math::

C (\theta) = \sum_{\bar{n}} h(\bar{n}, \theta) P(\bar{n})

where :math:h(\bar{n}, \theta) is given in :meth:~.Stochastic.h_reparametrized and now
contains the trainable parameters, and :math:P(\bar{n}) is the distribution over the
input adjacency matrix. The advantage of this alternative approach is that we do not
need to keep regenerating samples for an updated adjacency matrix and can instead use
a fixed set of samples.

The second approach above is utilized in :class:Stochastic to speed up evaluation of
the cost function and its gradient. This is done by approximating the cost function using a
single fixed set of samples. The samples can be pre-loaded into the :class:~.train.VGBS class or
generated once upon the first call of either :meth:Stochastic.evaluate or
:meth:Stochastic.grad.

**Example usage:**

>>> cost.evaluate(params, 100)
0.03005489236683591

Args:
params (array): the trainable parameters :math:\theta
n_samples (int): the number of GBS samples used to average the cost function

Returns:
float: the value of the stochastic cost function
"""
samples = self.vgbs.get_A_init_samples(n_samples)
return np.mean([self.h_reparametrized(s, params) for s in samples])

[docs]    def h_reparametrized(self, sample: np.ndarray, params: np.ndarray) -> float:
r"""Include trainable parameters in the :math:h(\bar{n}) function to allow sampling

The reparametrized function can be written in terms :math:h(\bar{n}) as:

.. math::

h(\bar{n}, \theta) = h(\bar{n}) \sqrt{\frac{\det (\mathbb{I} - A(\theta)^{2})}
{\det (\mathbb{I} - A^{2})}} \prod_{k=1}^{m}w_{k}^{n_{k}},

where :math:w_{k} is the :math:\theta-dependent weight on the :math:k-th mode in
the :class:~.train.VGBS system and :math:n_{k} is the number of photons in mode :math:k.

**Example usage:**

>>> sample = [1, 1, 0, 0]
>>> cost.h_reparametrized(sample, params)
-1.6688383062813434

Args:
sample (array): the sample
params (array): the trainable parameters :math:\theta

Returns:
float: the cost function with respect to a given sample and set of trainable parameters
"""
h = self.h(sample)
A = self.vgbs.A(params)
w = self.vgbs.embedding(params)
Id = np.eye(2 * self.vgbs.n_modes)

dets_numerator = np.linalg.det(Id - _Omat(A))
dets_denominator = np.linalg.det(Id - _Omat(self.vgbs.A_init))
dets = np.sqrt(dets_numerator / dets_denominator)

prod = np.prod(np.power(w, sample))

return h * dets * prod

def _gradient_one_sample(self, sample: np.ndarray, params: np.ndarray) -> np.ndarray:
"""Evaluates the gradient equation on a single sample.

Args:
sample (array): the sample
params (array): the trainable parameters :math:\theta

Returns:
"""
w = self.vgbs.embedding(params)
jac = self.vgbs.embedding.jacobian(params)

h = self.h_reparametrized(sample, params)

if self.vgbs.threshold:
diff = sample - self.vgbs.mean_clicks_by_mode(params)
else:
diff = sample - self.vgbs.mean_photons_by_mode(params)

return h * (diff / w) @ jac

[docs]    def grad(self, params: np.ndarray, n_samples: int) -> np.ndarray:
r"""Evaluates the gradient of the cost function.

As shown in this paper <https://arxiv.org/abs/2004.04770>__, the gradient can be
evaluated by finding an average over samples generated from the input adjacency matrix to
the VGBS system:

.. math::

\partial_{\theta} C (\theta) = \sum_{\bar{n}} h(\bar{n}, \theta) P(\bar{n})
\sum_{k=1}^{m}  (n_k - \langle n_{k} \rangle) \partial_{\theta} \log w_{k}

where :math:h(\bar{n}, \theta) is given in :meth:~.Stochastic.h_reparametrized,
:math:P(\bar{n}) is the distribution over the input adjacency matrix, :math:n_{k} is
the number of photons in mode :math:k, and :math:w_{k} are the weights in the
:class:~.train.VGBS system.

This method approximates the gradient using a fixed set of samples from the initial
adjacency matrix. The samples can be pre-loaded into the :class:~.train.VGBS class or
generated once upon the first call of :meth:Stochastic.evaluate or
:meth:Stochastic.grad.

**Example usage:**

array([ 0.10880756, -0.1247146 ,  0.12426481, -0.13783342])

Args:
params (array): the trainable parameters :math:\theta
n_samples (int): the number of GBS samples used in the gradient estimation

Returns:
"""
samples = self.vgbs.get_A_init_samples(n_samples)
return np.mean([self._gradient_one_sample(s, params) for s in samples], axis=0)

def __call__(self, params: np.ndarray, n_samples: int) -> float:
return self.evaluate(params, n_samples)


### Contents

Using Strawberry Fields

Development

API