Download Notebook View in GitHub Open in Google Colab
Solvating and equilibrating a ligand in a box of water
import time
import mdtraj
import nglview
import numpy
import openmm
import openmm.app
import openmm.unit
from openff.toolkit import ForceField, Molecule, Topology
from openff.units import unit
from rich.pretty import pprint
from openff.interchange import Interchange
from openff.interchange.components._packmol import RHOMBIC_DODECAHEDRON, pack_box
from openff.interchange.interop.openmm import to_openmm_positions
Construct the topology
In this example we’ll construct a topology consisting of one ligand in a rhombic dodecahedral box with 2100 water molecules. We’ll use a mapped SMILES when creating Molecule
objects to ensure the atom ordering matches. (Atom ordering is not strictly a part of SMILES and therefore liable to be changed with updates to RDKit.)
This can be extended or modified by i.e.
Replacing this sample ligand with a different ligand of interest - substitute out the ligand SMILES
Using a different number of water molecules - substitute out the
2100
used belowAdding ions or co-solvents into the box - add more
Molecule
object as desired
ligand = Molecule.from_mapped_smiles(
"[H:7][C@:6]1([C:13](=[C:11]([C:9](=[O:10])[O:8]1)[O:12][H:19])[O:14][H:20])[C@:3]([H:4])([C:2]([H:16])([H:17])[O:1][H:15])[O:5][H:18]"
)
water = Molecule.from_mapped_smiles("[H:2][O:1][H:3]")
There are a few ways to convert the information in this trajectory to an Openff Topology
object. Since we already know how many of which molecules we want, we’ll use a PACKMOL wrapper shipped with Interchange. The Topology
object returned by pack_box
contains the ligand, 1000 copies of water, the box vectors we asked for, and the positions generated by PACKMOL.
topology = pack_box(
molecules=[ligand, water],
number_of_copies=[1, 1000],
box_vectors=3.5 * RHOMBIC_DODECAHEDRON * unit.nanometer,
)
topology.n_molecules, topology.box_vectors, topology.get_positions().shape
(1001,
array([[3.5 , 0. , 0. ],
[0. , 3.5 , 0. ],
[1.75 , 1.75 , 2.47487373]]) <Unit('nanometer')>,
(3020, 3))
The “Sage” force field line (version 2.x.x) includes TIP3P parameters for water, so we don’t need to use multiple force fields to parametrize this topology. (One could use a different water model provided they accept the risks of using a different one than the force field was optimized with.)
Note that the “Parsley” (version 1.x.x) line did not include TIP3P parameters, so loading in an extra force field was required.
sage = ForceField("openff-2.0.0.offxml")
From here, we can create an Interchange
object, which stores the results of applying the force field to the topology. Since the Topology
object contained positions and box vectors, we don’t need to set them again - they’re already set on the Interchange
object!
interchange: Interchange = Interchange.from_smirnoff(
force_field=sage, topology=topology
)
interchange.topology.n_atoms, interchange.box, interchange.positions.shape
(3020,
array([[3.5 , 0. , 0. ],
[0. , 3.5 , 0. ],
[1.75 , 1.75 , 2.47487373]], dtype='>f8') <Unit('nanometer')>,
(3020, 3))
Now, we can prepare everything that OpenMM needs to run and report a brief equilibration simulation:
A
Simulation
object containingAn
openmm.System
A topology in OpenMM’s object model (
openmm.app.Topology
)Positions and box vectors in OpenMM’s unit solution (
openmm.unit.Quantity
)
A barostat, since we want to use NPT dynamics to relax the box size toward equilibrium
An integrator
Reporters for the trajectory and simulation data
For convenience, let’s wrap some boilerplate code into a function that can be called again later with different inputs.
def create_simulation(
interchange: Interchange,
pdb_stride: int = 500,
trajectory_name: str = "trajectory.pdb",
) -> openmm.app.Simulation:
integrator = openmm.LangevinIntegrator(
300 * openmm.unit.kelvin,
1 / openmm.unit.picosecond,
1 * openmm.unit.femtoseconds,
)
barostat = openmm.MonteCarloBarostat(
1.0 * openmm.unit.bar, 293.15 * openmm.unit.kelvin, 25
)
simulation = interchange.to_openmm_simulation(
combine_nonbonded_forces=True,
integrator=integrator,
)
simulation.system.addForce(barostat)
# https://github.com/openmm/openmm/wiki/Frequently-Asked-Questions#why-does-it-ignore-changes-i-make-to-a-system-or-force
simulation.context.reinitialize(preserveState=True)
# https://github.com/openmm/openmm/issues/3736#issuecomment-1217250635
simulation.minimizeEnergy()
simulation.context.setVelocitiesToTemperature(300 * openmm.unit.kelvin)
simulation.context.computeVirtualSites()
pdb_reporter = openmm.app.PDBReporter(trajectory_name, pdb_stride)
state_data_reporter = openmm.app.StateDataReporter(
"data.csv",
10,
step=True,
potentialEnergy=True,
temperature=True,
density=True,
)
simulation.reporters.append(pdb_reporter)
simulation.reporters.append(state_data_reporter)
return simulation
simulation = create_simulation(interchange)
Finally, we can run this simulation. This should take approximately 10-20 seconds on a laptop or small workstation.
Again, let’s wrap this up into a function to avoid copy-pasting code.
def run_simulation(simulation: openmm.app.Simulation, n_steps: int = 5000):
print("Starting simulation")
start_time = time.process_time()
print("Step, volume (nm^3)")
for step in range(n_steps):
simulation.step(1)
if step % 500 == 0:
box_vectors = simulation.context.getState().getPeriodicBoxVectors()
print(step, numpy.linalg.det(box_vectors._value).round(3))
end_time = time.process_time()
print(f"Elapsed time: {(end_time - start_time):.2f} seconds")
run_simulation(simulation)
Starting simulation
Step, volume (nm^3)
0 30.317
500 31.062
1000 30.753
1500 30.53
2000 30.875
2500 30.84
3000 30.502
3500 30.42
4000 30.291
4500 30.815
Elapsed time: 75.55 seconds
Appendix A: visualizing the trajectory
If NGLView is installed, we can use it and MDTraj to load and visualize the PDB trajectory:
view = nglview.show_mdtraj(mdtraj.load("trajectory.pdb"))
view.add_representation("licorice", selection="water")
view
Appendix B: using the OPC water model
The openff-forcefields
package now includes some common water models. After release 2023.05.1, this includes OPC, a 4-site water model from the Amber community. We can load it by simply passing it to the ForceField
constructor after Sage. (When loading multiple force fields like this, parameters in each section are appended in order of the files.) We can inspect the virtual site parameter in our new in-memory force field.
force_field_opc = ForceField("openff_unconstrained-2.0.0.offxml", "opc.offxml")
pprint(force_field_opc["VirtualSites"].parameters[0].to_dict())
{ │ 'smirks': '[#1:2]-[#8X2H2+0:1]-[#1:3]', │ 'epsilon': <Quantity(0.0, 'kilocalorie_per_mole')>, │ 'type': 'DivalentLonePair', │ 'match': 'once', │ 'distance': <Quantity(-0.15939833, 'angstrom')>, │ 'outOfPlaneAngle': <Quantity(0.0, 'degree')>, │ 'inPlaneAngle': None, │ 'charge_increment1': <Quantity(0.0, 'elementary_charge')>, │ 'charge_increment2': <Quantity(0.679142, 'elementary_charge')>, │ 'charge_increment3': <Quantity(0.679142, 'elementary_charge')>, │ 'rmin_half': <Quantity(1.0, 'angstrom')>, │ 'name': 'EP' }
We can also get a rough visualization of a single water molecule including the virtual site.
water.generate_conformers(n_conformers=1)
view = force_field_opc.create_interchange(water.to_topology()).visualize(
include_virtual_sites=True
)
view.clear_representations()
view.add_representation("ball+stick", aspectRatio=1.5)
view
Since we want to use a different force field with the same chemical topology - a ligand in a box of water - we can re-use the same Topology
object we prepared earlier, re-using the same functions we defined above!
interchange_opc = Interchange.from_smirnoff(
force_field=force_field_opc,
topology=topology,
)
interchange_opc.to_openmm()
<openmm.openmm.System; proxy of <Swig Object of type 'OpenMM::System *' at 0x7f5004963bd0> >
simulation = create_simulation(interchange_opc, trajectory_name="trajectory_opc.pdb")
run_simulation(simulation)
Starting simulation
Step, volume (nm^3)
0 30.317
500 30.02
1000 29.997
1500 29.454
2000 29.248
2500 29.351
3000 29.528
3500 29.387
4000 29.635
4500 29.498
Elapsed time: 112.87 seconds
# NGLview likes to infer bonds between virtual sites in ways that look messy,
# but you can flip this to `True` just to ensure they're there
show_virtual_sites = False
trajectory = mdtraj.load("trajectory_opc.pdb")
if show_virtual_sites:
view = nglview.show_mdtraj(trajectory)
else:
import numpy
trajectory = mdtraj.load("trajectory_opc.pdb")
atom_indices = numpy.where(
[a.element.mass > 0.0 for a in trajectory[0].topology.atoms]
)[0]
view = nglview.show_mdtraj(trajectory.atom_slice(atom_indices))
view.add_representation("licorice", selection="water")
view