Source code for openff.evaluator.storage.query

"""
A collection of classes used to query a storage backend for
data which matches a set of criteria.
"""
import abc

from openff.evaluator.attributes import UNDEFINED, Attribute, AttributeClass
from openff.evaluator.datasets import PropertyPhase
from openff.evaluator.forcefield import ForceFieldSource
from openff.evaluator.storage.attributes import QueryAttribute
from openff.evaluator.storage.data import ForceFieldData, StoredSimulationData
from openff.evaluator.substances import ExactAmount, Substance
from openff.evaluator.thermodynamics import ThermodynamicState


[docs]class BaseDataQuery(AttributeClass, abc.ABC): """A base class for queries which can be made to a `StorageBackend`. """
[docs] @classmethod @abc.abstractmethod def data_class(cls): """The type of data class that this query can be applied to. Returns ------- type of BaseStoredData """ raise NotImplementedError()
[docs] def apply(self, data_object): """Apply this query to a data object. Parameters ---------- data_object: BaseStoredData The data object to apply the query to. Returns ------- tuple of Any, optional The values of the matched parameters of the data object fully matched this query, otherwise `None`. """ if not isinstance(data_object, self.data_class()): return None matches = [] for attribute_name in self.get_attributes(QueryAttribute): attribute = getattr(self.__class__, attribute_name) if not hasattr(data_object, attribute_name) or attribute.custom_match: continue query_value = getattr(self, attribute_name) if query_value == UNDEFINED: continue data_value = getattr(data_object, attribute_name) matches.append(None if data_value != query_value else data_value) if any(x is None for x in matches): return None return tuple(matches)
[docs] @classmethod def from_data_object(cls, data_object): """Returns the query which would match this data object. Parameters ---------- data_object: BaseStoredData The data object to construct the query for. Returns ------- cls The query which would match this data object. """ query = cls() for attribute_name in cls.get_attributes(): if not hasattr(data_object, attribute_name): continue attribute_value = getattr(data_object, attribute_name) setattr(query, attribute_name, attribute_value) return query
[docs]class SubstanceQuery(AttributeClass, abc.ABC): """A query which focuses on finding data which was collected for substances with specific traits, e.g which contains both a solute and solvent, or only a solvent etc. """ components_only = Attribute( docstring="Only match pure data which was collected for " "one of the components in the query substance.", type_hint=bool, default_value=False, ) # component_roles = QueryAttribute( # docstring="Returns data for only the subset of a substance " # "which has the requested roles.", # type_hint=list, # optional=True, # )
[docs] def validate(self, attribute_type=None): super(SubstanceQuery, self).validate(attribute_type)
# if ( # self.components_only # and self.component_roles != UNDEFINED # and len(self.components_only) > 0 # ): # # raise ValueError( # "The `component_roles` attribute cannot be used when " # "the `components_only` attribute is `True`." # )
[docs]class ForceFieldQuery(BaseDataQuery): """A class used to query a `StorageBackend` for `ForceFieldData` which meet the specified criteria. """
[docs] @classmethod def data_class(cls): return ForceFieldData
force_field_source = QueryAttribute( docstring="The force field source to query for.", type_hint=ForceFieldSource, optional=True, )
[docs]class SimulationDataQuery(BaseDataQuery): """A class used to query a `StorageBackend` for `StoredSimulationData` which meet the specified set of criteria. """
[docs] @classmethod def data_class(cls): return StoredSimulationData
substance = QueryAttribute( docstring="The substance which the data should have been collected " "for. Data for a subset of this substance can be queried for by " "using the `substance_query` attribute", type_hint=Substance, optional=True, custom_match=True, ) substance_query = QueryAttribute( docstring="The subset of the `substance` to query for. This option " "can only be used when the `substance` attribute is set.", type_hint=SubstanceQuery, optional=True, custom_match=True, ) thermodynamic_state = QueryAttribute( docstring="The state at which the data should have been collected.", type_hint=ThermodynamicState, optional=True, ) property_phase = QueryAttribute( docstring="The phase of the substance (e.g. liquid, gas).", type_hint=PropertyPhase, optional=True, ) source_calculation_id = QueryAttribute( docstring="The server id which should have generated this data.", type_hint=str, optional=True, ) force_field_id = QueryAttribute( docstring="The id of the force field parameters which used to " "generate the data.", type_hint=str, optional=True, ) number_of_molecules = QueryAttribute( docstring="The total number of molecules in the system.", type_hint=int, optional=True, ) def _match_substance(self, data_object): """Attempt to match the substance (or a subset of it). Parameters ---------- data_object: StoredSimulationData The data object to match against. Returns ------- Substance, optional The matched substance if a match is made, otherwise `None`. """ if self.substance == UNDEFINED: return None data_substance: Substance = data_object.substance if self.substance_query == UNDEFINED: return None if self.substance != data_substance else self.substance # Handle the sub-substance match. if self.substance_query.components_only: if data_substance.number_of_components != 1: # We are only interested in pure data. return None for component in self.substance.components: if component.smiles != data_substance.components[0].smiles: continue # Make sure the amount type matches up i.e either both # are defined in mole fraction, or both as an exact amount. data_amount = next( iter(data_substance.get_amounts(component.identifier)) ) query_amount = next( iter(self.substance.get_amounts(component.identifier)) ) if type(data_amount) != type(query_amount): continue if isinstance(data_amount, ExactAmount) and data_amount != query_amount: # Make sure there is the same amount if we are # dealing with exact amounts. continue # A match was found. return data_substance return None
[docs] def apply(self, data_object, attributes_to_ignore=None): matches = [] # Apply a custom match behaviour for the substance # attribute. if self.substance != UNDEFINED: matches.append(self._match_substance(data_object)) base_matches = super(SimulationDataQuery, self).apply(data_object) base_matches = [None] if base_matches is None else base_matches matches = [*matches, *base_matches] if len(matches) == 0 or any(x is None for x in matches): return None return tuple(matches)
[docs] def validate(self, attribute_type=None): super(SimulationDataQuery, self).validate(attribute_type) if self.substance_query != UNDEFINED and self.substance == UNDEFINED: raise ValueError( "The `substance_query` can only be used when the " "`substance` attribute is set." )