"""
A collection of density physical property definitions.
"""
import copy
from typing import Dict, Tuple
from openff.evaluator import unit
from openff.evaluator.datasets import PhysicalProperty
from openff.evaluator.layers import register_calculation_schema
from openff.evaluator.layers.simulation import SimulationLayer, SimulationSchema
from openff.evaluator.protocols import (
analysis,
coordinates,
forcefield,
miscellaneous,
openmm,
yank,
)
from openff.evaluator.protocols.paprika.analysis import (
AnalyzeAPRPhase,
ComputeReferenceWork,
ComputeSymmetryCorrection,
)
from openff.evaluator.protocols.paprika.coordinates import (
AddDummyAtoms,
PreparePullCoordinates,
PrepareReleaseCoordinates,
)
from openff.evaluator.protocols.paprika.restraints import (
ApplyRestraints,
GenerateAttachRestraints,
GeneratePullRestraints,
GenerateReleaseRestraints,
)
from openff.evaluator.substances import Component
from openff.evaluator.thermodynamics import Ensemble
from openff.evaluator.workflow.schemas import ProtocolReplicator, WorkflowSchema
from openff.evaluator.workflow.utils import ProtocolPath, ReplicatorValue
[docs]class HostGuestBindingAffinity(PhysicalProperty):
"""A class representation of a host-guest binding affinity property"""
[docs] @classmethod
def default_unit(cls):
return unit.kilojoule / unit.mole
[docs] @staticmethod
def default_yank_schema(existing_schema=None):
"""Returns the default calculation schema to use when estimating
this class of property from direct simulations.
Parameters
----------
existing_schema: SimulationSchema, optional
An existing schema whose settings to use. If set,
the schema's `workflow_schema` will be overwritten
by this method.
Returns
-------
SimulationSchema
The schema to follow when estimating this property.
"""
calculation_schema = SimulationSchema()
if existing_schema is not None:
assert isinstance(existing_schema, SimulationSchema)
calculation_schema = copy.deepcopy(existing_schema)
schema = WorkflowSchema(property_type=HostGuestBindingAffinity.__name__)
schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")
# Initial coordinate and topology setup.
filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
filter_ligand.input_substance = ProtocolPath("substance", "global")
filter_ligand.component_roles = [Component.Role.Ligand]
# We only support substances with a single guest ligand.
filter_ligand.expected_components = 1
schema.protocols[filter_ligand.id] = filter_ligand.schema
# Construct the protocols which will (for now) take as input a set of host coordinates,
# and generate a set of charges for them.
filter_receptor = miscellaneous.FilterSubstanceByRole("filter_receptor")
filter_receptor.input_substance = ProtocolPath("substance", "global")
filter_receptor.component_roles = [Component.Role.Receptor]
# We only support substances with a single host receptor.
filter_receptor.expected_components = 1
schema.protocols[filter_receptor.id] = filter_receptor.schema
# Perform docking to position the guest within the host.
perform_docking = coordinates.BuildDockedCoordinates("perform_docking")
perform_docking.ligand_substance = ProtocolPath(
"filtered_substance", filter_ligand.id
)
perform_docking.receptor_coordinate_file = ProtocolPath(
"receptor_mol2", "global"
)
schema.protocols[perform_docking.id] = perform_docking.schema
# Solvate the docked structure using packmol
filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
filter_solvent.input_substance = ProtocolPath("substance", "global")
filter_solvent.component_roles = [Component.Role.Solvent]
schema.protocols[filter_solvent.id] = filter_solvent.schema
solvate_complex = coordinates.SolvateExistingStructure("solvate_complex")
solvate_complex.max_molecules = 1000
solvate_complex.substance = ProtocolPath(
"filtered_substance", filter_solvent.id
)
solvate_complex.solute_coordinate_file = ProtocolPath(
"docked_complex_coordinate_path", perform_docking.id
)
schema.protocols[solvate_complex.id] = solvate_complex.schema
# Assign force field parameters to the solvated complex system.
build_solvated_complex_system = forcefield.BaseBuildSystem(
"build_solvated_complex_system"
)
build_solvated_complex_system.force_field_path = ProtocolPath(
"force_field_path", "global"
)
build_solvated_complex_system.coordinate_file_path = ProtocolPath(
"coordinate_file_path", solvate_complex.id
)
build_solvated_complex_system.substance = ProtocolPath("substance", "global")
build_solvated_complex_system.charged_molecule_paths = [
ProtocolPath("receptor_mol2", "global")
]
schema.protocols[
build_solvated_complex_system.id
] = build_solvated_complex_system.schema
# Solvate the ligand using packmol
solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
solvate_ligand.max_molecules = 1000
solvate_ligand.substance = ProtocolPath("filtered_substance", filter_solvent.id)
solvate_ligand.solute_coordinate_file = ProtocolPath(
"docked_ligand_coordinate_path", perform_docking.id
)
schema.protocols[solvate_ligand.id] = solvate_ligand.schema
# Assign force field parameters to the solvated ligand system.
build_solvated_ligand_system = forcefield.BaseBuildSystem(
"build_solvated_ligand_system"
)
build_solvated_ligand_system.force_field_path = ProtocolPath(
"force_field_path", "global"
)
build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
"coordinate_file_path", solvate_ligand.id
)
build_solvated_ligand_system.substance = ProtocolPath("substance", "global")
schema.protocols[
build_solvated_ligand_system.id
] = build_solvated_ligand_system.schema
# Employ YANK to estimate the binding free energy.
yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")
yank_protocol.thermodynamic_state = ProtocolPath(
"thermodynamic_state", "global"
)
yank_protocol.number_of_iterations = 2000
yank_protocol.steps_per_iteration = 500
yank_protocol.checkpoint_interval = 10
yank_protocol.verbose = True
yank_protocol.force_field_path = ProtocolPath("force_field_path", "global")
yank_protocol.ligand_residue_name = ProtocolPath(
"ligand_residue_name", perform_docking.id
)
yank_protocol.receptor_residue_name = ProtocolPath(
"receptor_residue_name", perform_docking.id
)
yank_protocol.solvated_ligand_coordinates = ProtocolPath(
"coordinate_file_path", solvate_ligand.id
)
yank_protocol.solvated_ligand_system = ProtocolPath(
"parameterized_system", build_solvated_ligand_system.id
)
yank_protocol.solvated_complex_coordinates = ProtocolPath(
"coordinate_file_path", solvate_complex.id
)
yank_protocol.solvated_complex_system = ProtocolPath(
"parameterized_system", build_solvated_complex_system.id
)
schema.protocols[yank_protocol.id] = yank_protocol.schema
# Define where the final values come from.
schema.final_value_source = ProtocolPath(
"free_energy_difference", yank_protocol.id
)
calculation_schema.workflow_schema = schema
return calculation_schema
@classmethod
def _paprika_default_solvation_protocol(
cls, n_solvent_molecules: int
) -> coordinates.SolvateExistingStructure:
"""Returns the default protocol to use for solvating each window
of an APR calculation.
"""
solvation_protocol = coordinates.SolvateExistingStructure("")
solvation_protocol.max_molecules = n_solvent_molecules
solvation_protocol.count_exact_amount = False
solvation_protocol.box_aspect_ratio = [1.0, 1.0, 2.0]
solvation_protocol.center_solute_in_box = False
return solvation_protocol
@classmethod
def _paprika_default_simulation_protocols(
cls,
n_thermalization_steps: int,
n_equilibration_steps: int,
n_production_steps: int,
dt_thermalization: unit.Quantity,
dt_equilibration: unit.Quantity,
dt_production: unit.Quantity,
) -> Tuple[
openmm.OpenMMEnergyMinimisation,
openmm.OpenMMSimulation,
openmm.OpenMMSimulation,
openmm.OpenMMSimulation,
]:
"""Returns the default set of simulation protocols to use for each window
of an APR calculation.
Parameters
----------
n_thermalization_steps
The number of thermalization simulations steps to perform.
Sample generated during this step will be discarded.
n_equilibration_steps
The number of equilibration simulations steps to perform.
Sample generated during this step will be discarded.
n_production_steps
The number of production simulations steps to perform.
Sample generated during this step will be used in the final
free energy calculation.
dt_thermalization
The integration timestep during thermalization
dt_equilibration
The integration timestep during equilibration
dt_production
The integration timestep during production
Returns
-------
A protocol to perform an energy minimization, a thermalization,
an equilibration, and finally a production simulation.
"""
energy_minimisation = openmm.OpenMMEnergyMinimisation("")
thermalization = openmm.OpenMMSimulation("")
thermalization.steps_per_iteration = n_thermalization_steps
thermalization.output_frequency = 10000
thermalization.timestep = dt_thermalization
equilibration = openmm.OpenMMSimulation("")
equilibration.steps_per_iteration = n_equilibration_steps
equilibration.output_frequency = 10000
equilibration.timestep = dt_equilibration
production = openmm.OpenMMSimulation("")
production.steps_per_iteration = n_production_steps
production.output_frequency = 5000
production.timestep = dt_production
return energy_minimisation, thermalization, equilibration, production
@classmethod
def _paprika_build_simulation_protocols(
cls,
coordinate_path: ProtocolPath,
parameterized_system: ProtocolPath,
id_prefix: str,
id_suffix: str,
minimization_template: openmm.OpenMMEnergyMinimisation,
thermalization_template: openmm.OpenMMSimulation,
equilibration_template: openmm.OpenMMSimulation,
production_template: openmm.OpenMMSimulation,
) -> Tuple[
openmm.OpenMMEnergyMinimisation,
openmm.OpenMMSimulation,
openmm.OpenMMSimulation,
openmm.OpenMMSimulation,
]:
minimization = copy.deepcopy(minimization_template)
minimization.id = f"{id_prefix}_energy_minimization_{id_suffix}"
minimization.input_coordinate_file = coordinate_path
minimization.parameterized_system = parameterized_system
thermalization = copy.deepcopy(thermalization_template)
thermalization.id = f"{id_prefix}_thermalization_{id_suffix}"
thermalization.input_coordinate_file = ProtocolPath(
"output_coordinate_file", minimization.id
)
thermalization.parameterized_system = parameterized_system
thermalization.thermodynamic_state = ProtocolPath(
"thermodynamic_state", "global"
)
equilibration = copy.deepcopy(equilibration_template)
equilibration.id = f"{id_prefix}_equilibration_{id_suffix}"
equilibration.input_coordinate_file = ProtocolPath(
"output_coordinate_file", thermalization.id
)
equilibration.parameterized_system = parameterized_system
equilibration.thermodynamic_state = ProtocolPath(
"thermodynamic_state", "global"
)
production = copy.deepcopy(production_template)
production.id = f"{id_prefix}_production_{id_suffix}"
production.input_coordinate_file = ProtocolPath(
"output_coordinate_file", equilibration.id
)
production.parameterized_system = parameterized_system
production.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
return minimization, thermalization, equilibration, production
@classmethod
def _paprika_build_attach_pull_protocols(
cls,
orientation_replicator: ProtocolReplicator,
restraint_schemas: Dict[str, ProtocolPath],
solvation_template: coordinates.SolvateExistingStructure,
minimization_template: openmm.OpenMMEnergyMinimisation,
thermalization_template: openmm.OpenMMSimulation,
equilibration_template: openmm.OpenMMSimulation,
production_template: openmm.OpenMMSimulation,
):
# Define a replicator to set and solvate up the coordinates for each pull window
orientation_placeholder = orientation_replicator.placeholder_id
pull_replicator = ProtocolReplicator(
f"pull_replicator_{orientation_placeholder}"
)
pull_replicator.template_values = ProtocolPath("pull_windows_indices", "global")
pull_replicator_id = (
f"{pull_replicator.placeholder_id}_" f"{orientation_placeholder}"
)
attach_replicator = ProtocolReplicator(
f"attach_replicator_{orientation_placeholder}"
)
attach_replicator.template_values = ProtocolPath(
"attach_windows_indices", "global"
)
attach_replicator_id = (
f"{attach_replicator.placeholder_id}_" f"{orientation_placeholder}"
)
# Filter out only the solvent substance to help with the solvation step.
filter_solvent = miscellaneous.FilterSubstanceByRole(
"host-guest-filter_solvent"
)
filter_solvent.input_substance = ProtocolPath("substance", "global")
filter_solvent.component_roles = [Component.Role.Solvent]
# Define the protocols which will set and solvate up the coordinates for each
# pull window
align_coordinates = PreparePullCoordinates(
f"pull_align_coordinates_{pull_replicator_id}"
)
align_coordinates.substance = ProtocolPath("substance", "global")
align_coordinates.complex_file_path = ProtocolPath(
f"guest_orientations[{orientation_placeholder}].coordinate_path", "global"
)
align_coordinates.guest_orientation_mask = ProtocolPath(
"guest_orientation_mask", "global"
)
align_coordinates.pull_window_index = ReplicatorValue(pull_replicator.id)
align_coordinates.pull_distance = ProtocolPath("pull_distance", "global")
align_coordinates.n_pull_windows = ProtocolPath("n_pull_windows", "global")
solvate_coordinates = copy.deepcopy(solvation_template)
solvate_coordinates.id = f"pull_solvate_coordinates_{pull_replicator_id}"
solvate_coordinates.substance = ProtocolPath(
"filtered_substance", filter_solvent.id
)
solvate_coordinates.solute_coordinate_file = ProtocolPath(
"output_coordinate_path", align_coordinates.id
)
# Apply the force field parameters. This only needs to be done once.
apply_parameters = forcefield.BuildSmirnoffSystem(
f"pull_apply_parameters_{orientation_placeholder}"
)
apply_parameters.force_field_path = ProtocolPath("force_field_path", "global")
apply_parameters.substance = ProtocolPath("substance", "global")
apply_parameters.coordinate_file_path = ProtocolPath(
"coordinate_file_path",
f"pull_solvate_coordinates_0_{orientation_placeholder}",
)
# Add the dummy atoms.
add_dummy_atoms = AddDummyAtoms(f"pull_add_dummy_atoms_{pull_replicator_id}")
add_dummy_atoms.substance = ProtocolPath("substance", "global")
add_dummy_atoms.input_coordinate_path = ProtocolPath(
"coordinate_file_path", solvate_coordinates.id
)
add_dummy_atoms.input_system = ProtocolPath(
"parameterized_system", apply_parameters.id
)
add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")
attach_coordinate_path = ProtocolPath(
"output_coordinate_path",
f"pull_add_dummy_atoms_0_{orientation_placeholder}",
)
attach_system = ProtocolPath(
"output_system", f"pull_add_dummy_atoms_0_{orientation_placeholder}"
)
# Apply the attach restraints
generate_attach_restraints = GenerateAttachRestraints(
f"attach_generate_restraints_{orientation_placeholder}"
)
generate_attach_restraints.complex_coordinate_path = attach_coordinate_path
generate_attach_restraints.attach_lambdas = ProtocolPath(
"attach_lambdas", "global"
)
generate_attach_restraints.restraint_schemas = restraint_schemas
apply_attach_restraints = ApplyRestraints(
f"attach_apply_restraints_{attach_replicator_id}"
)
apply_attach_restraints.restraints_path = ProtocolPath(
"restraints_path", generate_attach_restraints.id
)
apply_attach_restraints.phase = "attach"
apply_attach_restraints.window_index = ReplicatorValue(attach_replicator.id)
apply_attach_restraints.input_system = attach_system
# Apply the pull restraints
generate_pull_restraints = GeneratePullRestraints(
f"pull_generate_restraints_{orientation_placeholder}"
)
generate_pull_restraints.complex_coordinate_path = attach_coordinate_path
generate_pull_restraints.attach_lambdas = ProtocolPath(
"attach_lambdas", "global"
)
generate_pull_restraints.n_pull_windows = ProtocolPath(
"n_pull_windows", "global"
)
generate_pull_restraints.restraint_schemas = restraint_schemas
apply_pull_restraints = ApplyRestraints(
f"pull_apply_restraints_{pull_replicator_id}"
)
apply_pull_restraints.restraints_path = ProtocolPath(
"restraints_path", generate_pull_restraints.id
)
apply_pull_restraints.phase = "pull"
apply_pull_restraints.window_index = ReplicatorValue(pull_replicator.id)
apply_pull_restraints.input_system = ProtocolPath(
"output_system", add_dummy_atoms.id
)
# Setup the simulations for the attach and pull phases.
(
attach_minimization,
attach_thermalization,
attach_equilibration,
attach_production,
) = cls._paprika_build_simulation_protocols(
attach_coordinate_path,
ProtocolPath("output_system", apply_attach_restraints.id),
"attach",
attach_replicator_id,
minimization_template,
thermalization_template,
equilibration_template,
production_template,
)
(
pull_minimization,
pull_thermalization,
pull_equilibration,
pull_production,
) = cls._paprika_build_simulation_protocols(
ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
ProtocolPath("output_system", apply_pull_restraints.id),
"pull",
pull_replicator_id,
minimization_template,
thermalization_template,
equilibration_template,
production_template,
)
# Analyze the attach phase.
attach_free_energy = AnalyzeAPRPhase(
f"analyze_attach_phase_{orientation_placeholder}"
)
attach_free_energy.topology_path = attach_coordinate_path
attach_free_energy.trajectory_paths = ProtocolPath(
"trajectory_file_path", attach_production.id
)
attach_free_energy.phase = "attach"
attach_free_energy.restraints_path = ProtocolPath(
"restraints_path", generate_attach_restraints.id
)
# Analyze the pull phase.
pull_free_energy = AnalyzeAPRPhase(
f"analyze_pull_phase_{orientation_placeholder}"
)
pull_free_energy.topology_path = attach_coordinate_path
pull_free_energy.trajectory_paths = ProtocolPath(
"trajectory_file_path", pull_production.id
)
pull_free_energy.phase = "pull"
pull_free_energy.restraints_path = ProtocolPath(
"restraints_path", generate_pull_restraints.id
)
reference_state_work = ComputeReferenceWork(
f"pull_reference_work_{orientation_placeholder}"
)
reference_state_work.thermodynamic_state = ProtocolPath(
"thermodynamic_state", "global"
)
reference_state_work.restraints_path = ProtocolPath(
"restraints_path", generate_pull_restraints.id
)
# Return the full list of protocols which make up the attach and pull parts
# of a host-guest APR calculation.
protocols = [
filter_solvent,
align_coordinates,
solvate_coordinates,
apply_parameters,
add_dummy_atoms,
generate_attach_restraints,
apply_attach_restraints,
generate_pull_restraints,
apply_pull_restraints,
attach_minimization,
attach_thermalization,
attach_equilibration,
attach_production,
pull_minimization,
pull_thermalization,
pull_equilibration,
pull_production,
attach_free_energy,
pull_free_energy,
reference_state_work,
]
protocol_replicators = [pull_replicator, attach_replicator]
return (
protocols,
protocol_replicators,
ProtocolPath("result", attach_free_energy.id),
ProtocolPath("result", pull_free_energy.id),
ProtocolPath("result", reference_state_work.id),
)
@classmethod
def _paprika_build_release_protocols(
cls,
orientation_replicator: ProtocolReplicator,
restraint_schemas: Dict[str, ProtocolPath],
solvation_template: coordinates.SolvateExistingStructure,
minimization_template: openmm.OpenMMEnergyMinimisation,
thermalization_template: openmm.OpenMMSimulation,
equilibration_template: openmm.OpenMMSimulation,
production_template: openmm.OpenMMSimulation,
):
# Define a replicator to set up each release window
release_replicator = ProtocolReplicator("release_replicator")
release_replicator.template_values = ProtocolPath(
"release_windows_indices", "global"
)
orientation_placeholder = orientation_replicator.placeholder_id
release_replicator_id = (
f"{release_replicator.placeholder_id}_" f"{orientation_placeholder}"
)
# Filter out only the solvent substance to help with the solvation step.
filter_solvent = miscellaneous.FilterSubstanceByRole("host-filter_solvent")
filter_solvent.input_substance = ProtocolPath("host_substance", "global")
filter_solvent.component_roles = [Component.Role.Solvent]
# Construct a set of coordinates for a host molecule correctly
# aligned to the z-axis.
align_coordinates = PrepareReleaseCoordinates("release_align_coordinates")
align_coordinates.substance = ProtocolPath("host_substance", "global")
align_coordinates.complex_file_path = ProtocolPath(
"host_coordinate_path", "global"
)
solvate_coordinates = copy.deepcopy(solvation_template)
solvate_coordinates.id = "release_solvate_coordinates"
solvate_coordinates.substance = ProtocolPath(
"filtered_substance", filter_solvent.id
)
solvate_coordinates.solute_coordinate_file = ProtocolPath(
"output_coordinate_path", align_coordinates.id
)
# Apply the force field parameters. This only needs to be done for one
# of the windows.
apply_parameters = forcefield.BaseBuildSystem("release_apply_parameters")
apply_parameters.force_field_path = ProtocolPath("force_field_path", "global")
apply_parameters.substance = ProtocolPath("host_substance", "global")
apply_parameters.coordinate_file_path = ProtocolPath(
"coordinate_file_path", solvate_coordinates.id
)
# Add the dummy atoms.
add_dummy_atoms = AddDummyAtoms("release_add_dummy_atoms")
add_dummy_atoms.substance = ProtocolPath("host_substance", "global")
add_dummy_atoms.input_coordinate_path = ProtocolPath(
"coordinate_file_path",
solvate_coordinates.id,
)
add_dummy_atoms.input_system = ProtocolPath(
"parameterized_system", apply_parameters.id
)
add_dummy_atoms.offset = ProtocolPath("dummy_atom_offset", "global")
# Apply the restraints files
generate_restraints = GenerateReleaseRestraints(
f"release_generate_restraints_{orientation_placeholder}"
)
generate_restraints.host_coordinate_path = ProtocolPath(
"output_coordinate_path", add_dummy_atoms.id
)
generate_restraints.release_lambdas = ProtocolPath("release_lambdas", "global")
generate_restraints.restraint_schemas = restraint_schemas
apply_restraints = ApplyRestraints(
f"release_apply_restraints_{release_replicator_id}"
)
apply_restraints.restraints_path = ProtocolPath(
"restraints_path", generate_restraints.id
)
apply_restraints.phase = "release"
apply_restraints.window_index = ReplicatorValue(release_replicator.id)
apply_restraints.input_system = ProtocolPath(
"output_system", add_dummy_atoms.id
)
# Setup the simulations for the release phase.
(
release_minimization,
release_thermalization,
release_equilibration,
release_production,
) = cls._paprika_build_simulation_protocols(
ProtocolPath("output_coordinate_path", add_dummy_atoms.id),
ProtocolPath("output_system", apply_restraints.id),
"release",
release_replicator_id,
minimization_template,
thermalization_template,
equilibration_template,
production_template,
)
# Analyze the release phase.
analyze_release_phase = AnalyzeAPRPhase(
f"analyze_release_phase_{orientation_placeholder}"
)
analyze_release_phase.topology_path = ProtocolPath(
"output_coordinate_path", add_dummy_atoms.id
)
analyze_release_phase.trajectory_paths = ProtocolPath(
"trajectory_file_path", release_production.id
)
analyze_release_phase.phase = "release"
analyze_release_phase.restraints_path = ProtocolPath(
"restraints_path", generate_restraints.id
)
# Return the full list of protocols which make up the release parts
# of a host-guest APR calculation.
protocols = [
filter_solvent,
align_coordinates,
solvate_coordinates,
apply_parameters,
add_dummy_atoms,
generate_restraints,
apply_restraints,
release_minimization,
release_thermalization,
release_equilibration,
release_production,
analyze_release_phase,
]
return (
protocols,
release_replicator,
ProtocolPath("result", analyze_release_phase.id),
)
[docs] @classmethod
def default_paprika_schema(
cls,
existing_schema: SimulationSchema = None,
n_solvent_molecules: int = 2500,
n_thermalization_steps: int = 50000,
n_equilibration_steps: int = 200000,
n_production_steps: int = 2500000,
dt_thermalization: unit.Quantity = 1.0 * unit.femtosecond,
dt_equilibration: unit.Quantity = 2.0 * unit.femtosecond,
dt_production: unit.Quantity = 2.0 * unit.femtosecond,
debug: bool = False,
):
"""Returns the default calculation schema to use when estimating
a host-guest binding affinity measurement with an APR calculation
using the ``paprika`` package.
Notes
-----
* This schema requires additional metadata to be able to estimate
each metadata. This metadata is automatically generated for properties
loaded from the ``taproom`` package using the ``TaproomDataSet`` object.
Parameters
----------
existing_schema: SimulationSchema, optional
An existing schema whose settings to use. If set,
the schema's `workflow_schema` will be overwritten
by this method.
n_solvent_molecules
The number of solvent molecules to add to the box.
n_thermalization_steps
The number of thermalization simulations steps to perform.
Sample generated during this step will be discarded.
n_equilibration_steps
The number of equilibration simulations steps to perform.
Sample generated during this step will be discarded.
n_production_steps
The number of production simulations steps to perform.
Sample generated during this step will be used in the final
free energy calculation.
dt_thermalization
The integration timestep during thermalization
dt_equilibration
The integration timestep during equilibration
dt_production
The integration timestep during production
debug
Whether to return a debug schema. This is nearly identical
to the default schema, albeit with significantly less
solvent molecules (10), all simulations run in NVT and much
shorter simulation runs (500 steps). If True, the other input
arguments will be ignored.
Returns
-------
SimulationSchema
The schema to follow when estimating this property.
"""
calculation_schema = SimulationSchema()
if existing_schema is not None:
assert isinstance(existing_schema, SimulationSchema)
calculation_schema = copy.deepcopy(existing_schema)
# Initialize the protocols which will serve as templates for those
# used in the actual workflows.
solvation_template = cls._paprika_default_solvation_protocol(
n_solvent_molecules=n_solvent_molecules
)
(
minimization_template,
*simulation_templates,
) = cls._paprika_default_simulation_protocols(
n_thermalization_steps=n_thermalization_steps,
n_equilibration_steps=n_equilibration_steps,
n_production_steps=n_production_steps,
dt_thermalization=dt_thermalization,
dt_equilibration=dt_equilibration,
dt_production=dt_production,
)
if debug:
solvation_template.max_molecules = 10
solvation_template.mass_density = 0.01 * unit.grams / unit.milliliters
for simulation_template in simulation_templates:
simulation_template.ensemble = Ensemble.NVT
simulation_template.steps_per_iteration = 500
simulation_template.output_frequency = 50
# Set up a replicator which will perform the attach-pull calculation for
# each of the guest orientations
orientation_replicator = ProtocolReplicator("orientation_replicator")
orientation_replicator.template_values = ProtocolPath(
"guest_orientations", "global"
)
restraint_schemas = {
"static": ProtocolPath(
f"guest_orientations[{orientation_replicator.placeholder_id}]."
f"static_restraints",
"global",
),
"conformational": ProtocolPath(
f"guest_orientations[{orientation_replicator.placeholder_id}]."
f"conformational_restraints",
"global",
),
"guest": ProtocolPath("guest_restraints", "global"),
"wall": ProtocolPath("wall_restraints", "global"),
"symmetry": ProtocolPath("symmetry_restraints", "global"),
}
# Build the protocols to compute the attach and pull free energies.
(
attach_pull_protocols,
attach_pull_replicators,
attach_free_energy,
pull_free_energy,
reference_work,
) = cls._paprika_build_attach_pull_protocols(
orientation_replicator,
restraint_schemas,
solvation_template,
minimization_template,
*simulation_templates,
)
# Build the protocols to compute the release free energies.
(
release_protocols,
release_replicator,
release_free_energy,
) = cls._paprika_build_release_protocols(
orientation_replicator,
restraint_schemas,
solvation_template,
minimization_template,
*simulation_templates,
)
# Compute the symmetry correction.
symmetry_correction = ComputeSymmetryCorrection("symmetry_correction")
symmetry_correction.n_microstates = ProtocolPath(
"n_guest_microstates", "global"
)
symmetry_correction.thermodynamic_state = ProtocolPath(
"thermodynamic_state", "global"
)
# Sum together the free energies of the individual orientations
orientation_free_energy = miscellaneous.AddValues(
f"orientation_free_energy_{orientation_replicator.placeholder_id}"
)
orientation_free_energy.values = [
attach_free_energy,
pull_free_energy,
reference_work,
release_free_energy,
ProtocolPath("result", symmetry_correction.id),
]
# Finally, combine all of the values together
total_free_energy = analysis.AverageFreeEnergies("total_free_energy")
total_free_energy.values = ProtocolPath("result", orientation_free_energy.id)
total_free_energy.thermodynamic_state = ProtocolPath(
"thermodynamic_state", "global"
)
calculation_schema.workflow_schema = WorkflowSchema()
calculation_schema.workflow_schema.protocol_schemas = [
*(protocol.schema for protocol in attach_pull_protocols),
*(protocol.schema for protocol in release_protocols),
symmetry_correction.schema,
orientation_free_energy.schema,
total_free_energy.schema,
]
calculation_schema.workflow_schema.protocol_replicators = [
orientation_replicator,
*attach_pull_replicators,
release_replicator,
]
# Define where the final value comes from.
calculation_schema.workflow_schema.final_value_source = ProtocolPath(
"result", total_free_energy.id
)
return calculation_schema
register_calculation_schema(
HostGuestBindingAffinity,
SimulationLayer,
HostGuestBindingAffinity.default_paprika_schema,
)