"""
A collection of protocols for running analysing the results of molecular simulations.
"""
import abc
import typing
from os import path
import numpy as np
import pint
from openff.evaluator import unit
from openff.evaluator.attributes import UNDEFINED
from openff.evaluator.thermodynamics import ThermodynamicState
from openff.evaluator.utils import statistics, timeseries
from openff.evaluator.utils.statistics import StatisticsArray, bootstrap
from openff.evaluator.workflow import Protocol, workflow_protocol
from openff.evaluator.workflow.attributes import (
InequalityMergeBehaviour,
InputAttribute,
OutputAttribute,
)
[docs]class AveragePropertyProtocol(Protocol, abc.ABC):
"""An abstract base class for protocols which will calculate the
average of a property and its uncertainty via bootstrapping.
"""
bootstrap_iterations = InputAttribute(
docstring="The number of bootstrap iterations to perform.",
type_hint=int,
default_value=250,
merge_behavior=InequalityMergeBehaviour.LargestValue,
)
bootstrap_sample_size = InputAttribute(
docstring="The relative sample size to use for bootstrapping.",
type_hint=float,
default_value=1.0,
merge_behavior=InequalityMergeBehaviour.LargestValue,
)
equilibration_index = OutputAttribute(
docstring="The index in the data set after which the data is stationary.",
type_hint=int,
)
statistical_inefficiency = OutputAttribute(
docstring="The statistical inefficiency in the data set.", type_hint=float
)
value = OutputAttribute(
docstring="The average value and its uncertainty.", type_hint=pint.Measurement
)
uncorrelated_values = OutputAttribute(
docstring="The uncorrelated values which the average was calculated from.",
type_hint=pint.Quantity,
)
def _bootstrap_function(self, **sample_kwargs):
"""The function to perform on the data set being sampled by
bootstrapping.
Parameters
----------
sample_kwargs: dict of str and np.ndarray
A key words dictionary of the bootstrap sample data, where the
sample data is a numpy array of shape=(num_frames, num_dimensions)
with dtype=float.
Returns
-------
float
The result of evaluating the data.
"""
assert len(sample_kwargs) == 1
sample_data = next(iter(sample_kwargs.values()))
return sample_data.mean()
[docs]class AverageTrajectoryProperty(AveragePropertyProtocol, abc.ABC):
"""An abstract base class for protocols which will calculate the
average of a property from a simulation trajectory.
"""
input_coordinate_file = InputAttribute(
docstring="The file path to the starting coordinates of a trajectory.",
type_hint=str,
default_value=UNDEFINED,
)
trajectory_path = InputAttribute(
docstring="The file path to the trajectory to average over.",
type_hint=str,
default_value=UNDEFINED,
)
@workflow_protocol()
class AverageFreeEnergies(Protocol):
"""A protocol which computes the Boltzmann weighted average
(ΔG° = -RT × Log[ Σ_{n} exp(-βΔG°_{n}) ]) of a set of free
energies which were measured at the same thermodynamic state.
Confidence intervals are computed by bootstrapping with replacement.
"""
values = InputAttribute(
docstring="The values to add together.", type_hint=list, default_value=UNDEFINED
)
thermodynamic_state = InputAttribute(
docstring="The thermodynamic state at which the free energies were measured.",
type_hint=ThermodynamicState,
default_value=UNDEFINED,
)
bootstrap_cycles = InputAttribute(
docstring="The number of bootstrap cycles to perform when estimating "
"the uncertainty in the combined free energies.",
type_hint=int,
default_value=2000,
)
result = OutputAttribute(
docstring="The sum of the values.", type_hint=pint.Measurement
)
confidence_intervals = OutputAttribute(
docstring="The 95% confidence intervals on the average free energy.",
type_hint=pint.Quantity,
)
def _execute(self, directory, available_resources):
default_unit = unit.kilocalorie / unit.mole
boltzmann_factor = (
self.thermodynamic_state.temperature * unit.molar_gas_constant
)
boltzmann_factor.ito(default_unit)
beta = 1.0 / boltzmann_factor
cycle_result = np.empty(self.bootstrap_cycles)
for cycle_index, cycle in enumerate(range(self.bootstrap_cycles)):
cycle_values = np.empty(len(self.values))
for value_index, value in enumerate(self.values):
mean = value.value.to(default_unit).magnitude
sem = value.error.to(default_unit).magnitude
sampled_value = np.random.normal(mean, sem) * default_unit
cycle_values[value_index] = (
(-beta * sampled_value).to(unit.dimensionless).magnitude
)
# ΔG° = -RT × Log[ Σ_{n} exp(-βΔG°_{n}) ]
cycle_result[cycle_index] = np.log(np.sum(np.exp(cycle_values)))
mean = np.mean(-boltzmann_factor * cycle_result)
sem = np.std(-boltzmann_factor * cycle_result)
confidence_intervals = np.empty(2)
sorted_statistics = np.sort(cycle_result)
confidence_intervals[0] = sorted_statistics[int(0.025 * self.bootstrap_cycles)]
confidence_intervals[1] = sorted_statistics[int(0.975 * self.bootstrap_cycles)]
confidence_intervals = -boltzmann_factor * confidence_intervals
self.result = mean.plus_minus(sem)
self.confidence_intervals = confidence_intervals
def validate(self, attribute_type=None):
super(AverageFreeEnergies, self).validate(attribute_type)
assert all(
isinstance(x, (unit.Measurement, pint.Measurement)) for x in self.values
)