"""
A collection of classes used to query a storage backend for
data which matches a set of criteria.
"""
import abc
from openff.evaluator.attributes import UNDEFINED, Attribute, AttributeClass
from openff.evaluator.datasets import PropertyPhase
from openff.evaluator.forcefield import ForceFieldSource
from openff.evaluator.storage.attributes import QueryAttribute
from openff.evaluator.storage.data import ForceFieldData, StoredSimulationData
from openff.evaluator.substances import ExactAmount, Substance
from openff.evaluator.thermodynamics import ThermodynamicState
[docs]class BaseDataQuery(AttributeClass, abc.ABC):
"""A base class for queries which can be made to
a `StorageBackend`.
"""
[docs] @classmethod
@abc.abstractmethod
def data_class(cls):
"""The type of data class that this
query can be applied to.
Returns
-------
type of BaseStoredData
"""
raise NotImplementedError()
[docs] def apply(self, data_object):
"""Apply this query to a data object.
Parameters
----------
data_object: BaseStoredData
The data object to apply the query to.
Returns
-------
tuple of Any, optional
The values of the matched parameters of the data
object fully matched this query, otherwise `None`.
"""
if not isinstance(data_object, self.data_class()):
return None
matches = []
for attribute_name in self.get_attributes(QueryAttribute):
attribute = getattr(self.__class__, attribute_name)
if not hasattr(data_object, attribute_name) or attribute.custom_match:
continue
query_value = getattr(self, attribute_name)
if query_value == UNDEFINED:
continue
data_value = getattr(data_object, attribute_name)
matches.append(None if data_value != query_value else data_value)
if any(x is None for x in matches):
return None
return tuple(matches)
[docs] @classmethod
def from_data_object(cls, data_object):
"""Returns the query which would match this data
object.
Parameters
----------
data_object: BaseStoredData
The data object to construct the query for.
Returns
-------
cls
The query which would match this data object.
"""
query = cls()
for attribute_name in cls.get_attributes():
if not hasattr(data_object, attribute_name):
continue
attribute_value = getattr(data_object, attribute_name)
setattr(query, attribute_name, attribute_value)
return query
[docs]class SubstanceQuery(AttributeClass, abc.ABC):
"""A query which focuses on finding data which was
collected for substances with specific traits, e.g
which contains both a solute and solvent, or only a
solvent etc.
"""
components_only = Attribute(
docstring="Only match pure data which was collected for "
"one of the components in the query substance.",
type_hint=bool,
default_value=False,
)
# component_roles = QueryAttribute(
# docstring="Returns data for only the subset of a substance "
# "which has the requested roles.",
# type_hint=list,
# optional=True,
# )
[docs] def validate(self, attribute_type=None):
super(SubstanceQuery, self).validate(attribute_type)
# if (
# self.components_only
# and self.component_roles != UNDEFINED
# and len(self.components_only) > 0
# ):
#
# raise ValueError(
# "The `component_roles` attribute cannot be used when "
# "the `components_only` attribute is `True`."
# )
[docs]class ForceFieldQuery(BaseDataQuery):
"""A class used to query a `StorageBackend` for
`ForceFieldData` which meet the specified criteria.
"""
[docs] @classmethod
def data_class(cls):
return ForceFieldData
force_field_source = QueryAttribute(
docstring="The force field source to query for.",
type_hint=ForceFieldSource,
optional=True,
)
[docs]class SimulationDataQuery(BaseDataQuery):
"""A class used to query a `StorageBackend` for
`StoredSimulationData` which meet the specified set
of criteria.
"""
[docs] @classmethod
def data_class(cls):
return StoredSimulationData
substance = QueryAttribute(
docstring="The substance which the data should have been collected "
"for. Data for a subset of this substance can be queried for by "
"using the `substance_query` attribute",
type_hint=Substance,
optional=True,
custom_match=True,
)
substance_query = QueryAttribute(
docstring="The subset of the `substance` to query for. This option "
"can only be used when the `substance` attribute is set.",
type_hint=SubstanceQuery,
optional=True,
custom_match=True,
)
thermodynamic_state = QueryAttribute(
docstring="The state at which the data should have been collected.",
type_hint=ThermodynamicState,
optional=True,
)
property_phase = QueryAttribute(
docstring="The phase of the substance (e.g. liquid, gas).",
type_hint=PropertyPhase,
optional=True,
)
source_calculation_id = QueryAttribute(
docstring="The server id which should have generated this data.",
type_hint=str,
optional=True,
)
force_field_id = QueryAttribute(
docstring="The id of the force field parameters which used to "
"generate the data.",
type_hint=str,
optional=True,
)
number_of_molecules = QueryAttribute(
docstring="The total number of molecules in the system.",
type_hint=int,
optional=True,
)
def _match_substance(self, data_object):
"""Attempt to match the substance (or a subset of it).
Parameters
----------
data_object: StoredSimulationData
The data object to match against.
Returns
-------
Substance, optional
The matched substance if a match is made, otherwise
`None`.
"""
if self.substance == UNDEFINED:
return None
data_substance: Substance = data_object.substance
if self.substance_query == UNDEFINED:
return None if self.substance != data_substance else self.substance
# Handle the sub-substance match.
if self.substance_query.components_only:
if data_substance.number_of_components != 1:
# We are only interested in pure data.
return None
for component in self.substance.components:
if component.smiles != data_substance.components[0].smiles:
continue
# Make sure the amount type matches up i.e either both
# are defined in mole fraction, or both as an exact amount.
data_amount = next(
iter(data_substance.get_amounts(component.identifier))
)
query_amount = next(
iter(self.substance.get_amounts(component.identifier))
)
if type(data_amount) != type(query_amount):
continue
if isinstance(data_amount, ExactAmount) and data_amount != query_amount:
# Make sure there is the same amount if we are
# dealing with exact amounts.
continue
# A match was found.
return data_substance
return None
[docs] def apply(self, data_object, attributes_to_ignore=None):
matches = []
# Apply a custom match behaviour for the substance
# attribute.
if self.substance != UNDEFINED:
matches.append(self._match_substance(data_object))
base_matches = super(SimulationDataQuery, self).apply(data_object)
base_matches = [None] if base_matches is None else base_matches
matches = [*matches, *base_matches]
if len(matches) == 0 or any(x is None for x in matches):
return None
return tuple(matches)
[docs] def validate(self, attribute_type=None):
super(SimulationDataQuery, self).validate(attribute_type)
if self.substance_query != UNDEFINED and self.substance == UNDEFINED:
raise ValueError(
"The `substance_query` can only be used when the "
"`substance` attribute is set."
)