Source code for strawberryfields.apps.data.sample

# Copyright 2019-2020 Xanadu Quantum Technologies Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
r"""
Submodule for sample datasets and their base classes.
"""
# pylint: disable=unnecessary-pass
from abc import ABC, abstractmethod

import pkg_resources
import numpy as np
import scipy

DATA_PATH = pkg_resources.resource_filename("strawberryfields", "apps/data/sample_data") + "/"


[docs]class SampleDataset(ABC):
    """Base class for loading datasets of pre-generated samples.

    Attributes:
        n_mean (float): mean number of photons in the GBS device
        threshold (bool): flag to indicate whether samples are generated with threshold detection
            (i.e., detectors of zero or some photons) or with photon-number-resolving detectors.
        n_samples (int): total number of samples in the dataset
        modes (int): number of modes in the GBS device or, equivalently, number of nodes in graph
        data (sparse): raw data of samples from GBS as a `csr sparse array
            <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html>`__.
    """

    _count = 0

    @property
    @abstractmethod
    def _data_filename(self) -> str:
        """Base name of files containing the sample data stored in the ``./sample_data/`` directory.

        Samples and corresponding adjacency matrix should both be provided as a
        ``scipy.sparse.csr_matrix`` saved in ``.npz`` format.

        For ``_data_filename = "example"``, the corresponding samples should be stored as
        ``./sample_data/example.npz`` and the adjacency matrix as ``./sample_data/example_A.npz``.
        """
        pass

    def __init__(self):
        self.data = scipy.sparse.load_npz(DATA_PATH + self._data_filename + ".npz")
        self.n_samples, self.modes = self.data.shape

    def __iter__(self):
        return self

    def __next__(self):
        if self._count < self.n_samples:
            self._count += 1
            return self.__getitem__(self._count - 1)
        self._count = 0
        raise StopIteration

    def _elem(self, i):
        """Access the i-th element of the sparse array and output as a list."""
        return list(self.data[i].toarray()[0])

    def __getitem__(self, key):

        if not isinstance(key, (slice, tuple, int)):
            raise TypeError("Dataset indices must be integers, slices, or tuples")

        if isinstance(key, int):
            return self._elem(key + self.n_samples if key < 0 else key)

        if isinstance(key, tuple):
            key = slice(*key)

        range_tuple = key.indices(self.n_samples)
        return [self._elem(i) for i in range(*range_tuple)]

    def __len__(self):
        return self.n_samples

[docs]    def counts(self, axis: int = 1) -> list:
        """Count number of photons or clicks.

        Counts number of photons/clicks in each sample (``axis==1``) or number of photons/clicks
        in each mode compounded over all samples (``axis==0``).

        Args:
            axis (int): axis to perform count

        Returns:
            list: counts from samples
        """
        return np.array(self.data.sum(axis)).flatten().tolist()

    # pylint: disable=missing-docstring
    @property
    @abstractmethod
    def n_mean(self) -> float:
        pass

    # pylint: disable=missing-docstring
    @property
    @abstractmethod
    def threshold(self) -> bool:
        pass


# pylint: disable=abstract-method
[docs]class GraphDataset(SampleDataset, ABC):
    """Class for loading datasets of pre-generated samples from graphs.

    Attributes:
        adj (array): adjacency matrix of the graph from which samples were generated
    """

    def __init__(self):
        super().__init__()
        self.adj = scipy.sparse.load_npz(DATA_PATH + self._data_filename + "_A.npz").toarray()


[docs]class Planted(GraphDataset):
    """A random 30-node graph containing a dense 10-node subgraph planted inside
    :cite:`arrazola2018using`.

    The graph is generated by joining two Erdős–Rényi random graphs. The first 20-node graph is
    generated with edge probability of 0.5 and the second 10-node planted graph is generated with
    edge probability of 0.875. The two graphs are joined by selecting 8 vertices at random from
    both and adding an edge between them.

    The 10-node planted clique is contained within the final 10 nodes of the graph.

    **Graph:**

    .. |planted| image:: ../../../_static/graphs/planted.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |planted|

    Attributes:
        n_mean = 8
        threshold = True
        n_samples = 50000
        modes = 30
    """

    _data_filename = "planted"
    n_mean = 8
    threshold = True


[docs]class TaceAs(GraphDataset):
    """Binding interaction graph for the TACE-AS complex :cite:`banchi2019molecular`.

    Nodes in this graph correspond to pairs of atoms in a target protein and a pharmaceutical
    molecule. Edges in the graph are added if the distance between both pairs of atoms is very
    close to equal. Cliques in the graph correspond to possible docking configurations of protein
    and molecule, and the largest clique is the most stable configuration. There are multiple
    maximum-sized cliques of 8 nodes in this graph.

    **Graph:**

    .. |tace_as| image:: ../../../_static/graphs/TACE-AS.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |tace_as|

    Attributes:
        n_mean = 8
        threshold = True
        n_samples = 50000
        modes = 24
    """

    _data_filename = "TACE-AS"
    n_mean = 8
    threshold = True


[docs]class PHat(GraphDataset):
    """Random graph created using the p-hat generator of :cite:`gendreau1993solving`.

    This graph is the ``p_hat300-1`` graph of the `DIMACS
    <http://iridia.ulb.ac.be/~fmascia/maximum_clique/DIMACS-benchmark>`__ dataset, which is a
    collection of large graphs with cliques that are hard to find. The best known clique of
    this 300-node graph is of size 8 and is composed of nodes: ``[53, 123, 180, 218, 246, 267, 270,
    286]``. This graph is not visualized due to its large size.

    Attributes:
        n_mean = 10
        threshold = True
        n_samples = 50000
        modes = 300
    """

    _data_filename = "p_hat300-1"
    n_mean = 10
    threshold = True


[docs]class Mutag0(GraphDataset):
    """First graph of the MUTAG dataset.

    The MUTAG dataset is from :cite:`debnath1991structure,kriege2012subgraph` and is available
    `here <https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets>`__.

    **Graph:**

    .. |mutag_0| image:: ../../../_static/graphs/MUTAG_0.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |mutag_0|

    Attributes:
        n_mean = 6
        threshold = False
        n_samples = 20000
        modes = 17
    """

    _data_filename = "MUTAG_0"
    n_mean = 6
    threshold = False


[docs]class Mutag1(GraphDataset):
    """Second graph of the MUTAG dataset.

    The MUTAG dataset is from :cite:`debnath1991structure,kriege2012subgraph` and is available
    `here <https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets>`__.

    **Graph:**

    .. |mutag_1| image:: ../../../_static/graphs/MUTAG_1.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |mutag_1|

    Attributes:
        n_mean = 6
        threshold = False
        n_samples = 20000
        modes = 13
    """

    _data_filename = "MUTAG_1"
    n_mean = 6
    threshold = False


[docs]class Mutag2(GraphDataset):
    """Third graph of the MUTAG dataset.

    The MUTAG dataset is from :cite:`debnath1991structure,kriege2012subgraph` and is available
    `here <https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets>`__.

    **Graph:**

    .. |mutag_2| image:: ../../../_static/graphs/MUTAG_2.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |mutag_2|

    Attributes:
        n_mean = 6
        threshold = False
        n_samples = 20000
        modes = 13
    """

    _data_filename = "MUTAG_2"
    n_mean = 6
    threshold = False


[docs]class Mutag3(GraphDataset):
    """Fourth graph of the MUTAG dataset.

    The MUTAG dataset is from :cite:`debnath1991structure,kriege2012subgraph` and is available
    `here <https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets>`__.

    **Graph:**

    .. |mutag_3| image:: ../../../_static/graphs/MUTAG_3.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |mutag_3|

    Attributes:
        n_mean = 6
        threshold = False
        n_samples = 20000
        modes = 19
    """

    _data_filename = "MUTAG_3"
    n_mean = 6
    threshold = False


# pylint: disable=abstract-method
[docs]class MoleculeDataset(SampleDataset, ABC):
    r"""Class for loading datasets of pre-generated samples from molecules.

    Attributes:
        w (array): normal mode frequencies of the electronic ground state (:math:`\mbox{cm}^{-1}`)
        wp (array): normal mode frequencies of the electronic excited state (:math:`\mbox{cm}^{-1}`)
        Ud (array): Duschinsky matrix
        delta (array): Displacement vector, with entries :math:`\delta_i=\sqrt{\omega'_i/\hbar}d_i`,
           and :math:`d_i` is the Duschinsky displacement
        T (float): temperature (Kelvin)
    """

    def __init__(self):
        super().__init__()
        self.w = scipy.sparse.load_npz(DATA_PATH + self._data_filename + "_w.npz").toarray()[0]
        self.wp = scipy.sparse.load_npz(DATA_PATH + self._data_filename + "_wp.npz").toarray()[0]
        self.Ud = scipy.sparse.load_npz(DATA_PATH + self._data_filename + "_Ud.npz").toarray()
        self.delta = scipy.sparse.load_npz(
            DATA_PATH + self._data_filename + "_delta.npz"
        ).toarray()[0]

    # pylint: disable=missing-docstring
    @property
    @abstractmethod
    def T(self) -> bool:
        pass


[docs]class Formic(MoleculeDataset):
    """Zero temperature formic acid.

    The molecular parameters are obtained from Ref. :cite:`huh2015boson`.

    **Molecule:**

    .. |formic| image:: ../../../_static/formic.png
        :align: middle
        :width: 250px
        :target: javascript:void(0);

    |formic|

    Attributes:
        n_mean = 1.56
        threshold = False
        n_samples = 20000
        modes = 14
        T = 0
    """

    _data_filename = "formic"
    n_mean = 1.56
    threshold = False
    T = 0


[docs]class Water(SampleDataset):
    """Vibrational dynamics of the water molecule.

    The molecular parameters are obtained from Ref. :cite:`sparrow2018simulating`.

    **Molecule:**

    .. |water| image:: ../../../_static/water.png
        :align: middle
        :width: 200px
        :target: javascript:void(0);

    |water|

    Args:
        t (float): time of evolution in femtoseconds

    Attributes:
        n_mean = 1/3
        threshold = False
        n_samples = 135000, 5000 samples for each of 27 available times
        modes = 3
    """

    _times_to_indices = {t: i for i, t in enumerate(np.linspace(0, 260, 27))}

    # pylint: disable=super-init-not-called
    def __init__(self, t):
        if t not in self._times_to_indices:
            raise ValueError(
                "The selected time is not correct. Available times are 0, 10, 20, ..., 260"
            )
        index = self._times_to_indices[t]

        all_data = np.load(DATA_PATH + "water.npz")["arr_0"]

        self.data = all_data[index]
        self.data = scipy.sparse.csr_matrix(self.data)
        self.n_samples, self.modes = self.data.shape

        self.w = scipy.sparse.load_npz(DATA_PATH + "water_w.npz").toarray()[0]
        self.U = scipy.sparse.load_npz(DATA_PATH + "water_U.npz").toarray()

    n_mean = 1 / 3
    threshold = False
    _data_filename = "water"

    available_times = list(_times_to_indices.keys())


# pylint: disable=too-many-instance-attributes
[docs]class Pyrrole(SampleDataset):
    """Vibrational dynamics of the `pyrrole <https://en.wikipedia.org/wiki/Pyrrole>`__ molecule.

    The molecular parameters are obtained from Ref. :cite:`jahangiri2020quantum`.

    **Molecule:**

    .. |pyrrole| image:: ../../../_static/pyrrole.png
        :align: middle
        :width: 350px
        :target: javascript:void(0);

    |pyrrole|

    Args:
        t (float): time of evolution in femtoseconds

    Attributes:
        n_mean = 0.12599583
        threshold = False
        n_samples = 10000, 1000 samples for each of 10 available times
        modes = 24
    """

    # pylint: disable=too-many-instance-attributes
    _times_to_indices = {t: i for i, t in enumerate(np.linspace(0, 900, 10))}

    # pylint: disable=super-init-not-called
    def __init__(self, t):
        if t not in self._times_to_indices:
            raise ValueError(
                "The selected time is not correct. Available times are 0, 100, 200, ..., 900"
            )
        index = self._times_to_indices[t]

        all_data = np.load(DATA_PATH + "pyrrole.npz")["arr_0"]

        self.data = all_data[index]
        self.data = scipy.sparse.csr_matrix(self.data)
        self.n_samples, self.modes = self.data.shape

        self.ri = scipy.sparse.load_npz(DATA_PATH + "pyrrole_ri.npz").toarray()[0]
        self.rf = scipy.sparse.load_npz(DATA_PATH + "pyrrole_rf.npz").toarray()[0]
        self.wi = scipy.sparse.load_npz(DATA_PATH + "pyrrole_wi.npz").toarray()[0]
        self.wf = scipy.sparse.load_npz(DATA_PATH + "pyrrole_wf.npz").toarray()[0]
        self.Li = scipy.sparse.load_npz(DATA_PATH + "pyrrole_Li.npz").toarray()
        self.Lf = scipy.sparse.load_npz(DATA_PATH + "pyrrole_Lf.npz").toarray()
        self.m = scipy.sparse.load_npz(DATA_PATH + "pyrrole_m.npz").toarray()[0]
        self.U = scipy.sparse.load_npz(DATA_PATH + "pyrrole_U.npz").toarray()

    n_mean = 0.12599583
    threshold = False
    _data_filename = "pyrrole"

    available_times = list(_times_to_indices.keys())
_modules/strawberryfields/apps/data/sample
Download Python script
Download Notebook
View on GitHub
Source code for strawberryfields.apps.data.sample

Contents