Building a variety of different virtual sites

In this example, several different types of SMIRNOFF virtual sites are used to model different chemistries, including

  • 4-site water

  • 5-site water

  • sigma holes

  • lone pairs of oxygens planar carbonyls

  • lone pairs of nitrogens in ammonia

from openff.toolkit import ForceField

sage_with_example_virtual_sites = ForceField(
    "openff-2.1.0.offxml",
    "example-vsites-parameters-forcefield.offxml",
)
import mdtraj
import nglview
import openmm
import openmm.unit
from openff.toolkit import Molecule


def viz(
    smiles: str,
    force_field: ForceField = sage_with_example_virtual_sites,
) -> nglview.NGLWidget:
    """Visualize a molecule with virtual sites."""
    molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)
    molecule.generate_conformers(n_conformers=1)

    interchange = force_field.create_interchange(molecule.to_topology())

    return interchange._visualize_nglview(include_virtual_sites=True)


def run(
    smiles: str,
    name: str,
    force_field: ForceField = sage_with_example_virtual_sites,
) -> nglview.NGLWidget:
    """Run and visualize a short simulation of a molecule with virtual sites."""
    molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True)
    molecule.generate_conformers(n_conformers=1)

    interchange = force_field.create_interchange(molecule.to_topology())

    # interchange.minimize()

    integrator = openmm.LangevinIntegrator(
        300 * openmm.unit.kelvin,
        1 / openmm.unit.picosecond,
        1 * openmm.unit.femtoseconds,
    )

    simulation = interchange.to_openmm_simulation(integrator=integrator)

    dcd_reporter = openmm.app.DCDReporter("trajectory.dcd", 10)
    simulation.reporters.append(dcd_reporter)

    simulation.context.computeVirtualSites()
    simulation.minimizeEnergy()
    simulation.context.setVelocitiesToTemperature(openmm.unit.kelvin * 300)
    simulation.step(1000)

    interchange.positions = simulation.context.getState(
        getPositions=True
    ).getPositions()

    open(f"{name}.xml", "w").write(
        openmm.XmlSerializer.serialize(interchange.to_openmm())
    )

    return nglview.show_mdtraj(
        mdtraj.load(
            "trajectory.dcd",
            top=mdtraj.Topology.from_openmm(
                interchange.to_openmm_topology(),
            ),
        )
    )

The first parameter is of type BondCharge, which places a virtual site along the axis of a bond. This was originally intended for use with halogens to better model sigma holes. In this case, a virtual site is added \(1.45 \AA\) “outside” a carbon-chlorine bond.

sage_with_example_virtual_sites["VirtualSites"].get_parameter({"name": "sigma_hole"})[
    0
].to_dict()
{'smirks': '[#6:2]-[#17X1:1]',
 'epsilon': 0.0 <Unit('kilojoules_per_mole')>,
 'type': 'BondCharge',
 'match': 'all_permutations',
 'distance': 1.45 <Unit('angstrom')>,
 'outOfPlaneAngle': None,
 'inPlaneAngle': None,
 'charge_increment1': -0.063 <Unit('elementary_charge')>,
 'charge_increment2': 0.0 <Unit('elementary_charge')>,
 'sigma': 0.0 <Unit('angstrom')>,
 'name': 'sigma_hole'}
viz("CCCCCl")
run("CCCCCl", name="sigma_hole")

Next is a parameter using MonovalentLonePair. In this case, a virtual site is added near the oxygen in a carbonyl. It is placed at an angle (inPlaneAngle) formed by the oxygen and carbon atoms of the carbonyl and in the plane defined by those atoms and the alpha carbon. If the parameter outOfPlaneAngle were non-zero, it would be placed at an angle out of that plane.

sage_with_example_virtual_sites["VirtualSites"].get_parameter(
    {"name": "planar_carbonyl"}
)[0].to_dict()
{'smirks': '[#8:1]=[#6X3+0:2]-[#6:3]',
 'epsilon': 0.1 <Unit('kilocalories_per_mole')>,
 'type': 'MonovalentLonePair',
 'match': 'all_permutations',
 'distance': 0.5 <Unit('angstrom')>,
 'outOfPlaneAngle': 0 <Unit('degree')>,
 'inPlaneAngle': 110 <Unit('degree')>,
 'charge_increment1': 0.1 <Unit('elementary_charge')>,
 'charge_increment2': 0.0 <Unit('elementary_charge')>,
 'charge_increment3': 0.0 <Unit('elementary_charge')>,
 'sigma': 0.05 <Unit('angstrom')>,
 'name': 'planar_carbonyl'}
viz("COC1=C(C=CC(=C1)C=O)O")
run("COC1=C(C=CC(=C1)C=O)O", name="planar_carbonyl")

The next parameter completes a so-called four-site water model like TIP4P or OPC. A single virtual site is placed near the oxygen in the plane of the three atoms. This is implemented with the type DivalentLonePair, though it is possible to also implement it with MonovalentLonePair.

sage_with_example_virtual_sites["VirtualSites"].get_parameter({"name": "4_site_water"})[
    0
].to_dict()
{'smirks': '[#1:2]-[#8X2H2+0:1]-[#1:3]',
 'epsilon': 0.6 <Unit('kilojoules_per_mole')>,
 'type': 'DivalentLonePair',
 'match': 'once',
 'distance': -0.15 <Unit('angstrom')>,
 'outOfPlaneAngle': 0 <Unit('degree')>,
 'inPlaneAngle': None,
 'charge_increment1': 0.0 <Unit('elementary_charge')>,
 'charge_increment2': 1 <Unit('elementary_charge')>,
 'charge_increment3': 1 <Unit('elementary_charge')>,
 'sigma': 0.7 <Unit('angstrom')>,
 'name': '4_site_water'}
run("O", name="4_site_water")
viz("O")

There are also 5-site water models, such as TIP5P. These are more complex as they require placing virtual sites outside the plane formed by the atoms in the molecule. To avoid squeezing two water models into a single force field, this portion uses a force field from a different file.

tip5p = ForceField("tip5p.offxml")

tip5p["VirtualSites"].get_parameter({"name": "5_site_water"})[0].to_dict()
{'smirks': '[#1:2]-[#8X2H2+0:1]-[#1:3]',
 'epsilon': 0.0 <Unit('kilocalorie_per_mole')>,
 'type': 'DivalentLonePair',
 'match': 'all_permutations',
 'distance': 0.07 <Unit('nanometer')>,
 'outOfPlaneAngle': 54.735 <Unit('degree')>,
 'inPlaneAngle': None,
 'charge_increment1': 0.0 <Unit('elementary_charge')>,
 'charge_increment2': 0.1205 <Unit('elementary_charge')>,
 'charge_increment3': 0.1205 <Unit('elementary_charge')>,
 'sigma': 10.0 <Unit('angstrom')>,
 'name': '5_site_water'}

The next parameter uses TrivalentLonePair to model the lone pair of a trivalent nitrogen. It is written to match only ammonia - the other capturing atoms are all hydrogens - but a SMIRKS pattern could be written to match trivalent nitrogens in other chemistries. A virtual site is placed \(5 \AA\) away from the nitrogen atom, opposite a plane defined by the three hydrogen atoms. (You may need to rotate the molecule, using your cursor, to see the virtual site).

viz("O", force_field=tip5p)
run("O", force_field=tip5p, name="5_site_water")
sage_with_example_virtual_sites["VirtualSites"].get_parameter(
    {"name": "trivalent_nitrogen"}
)[0].to_dict()
{'smirks': '[#1:2][#7:1]([#1:3])[#1:4]',
 'epsilon': 0.5 <Unit('kilojoules_per_mole')>,
 'type': 'TrivalentLonePair',
 'match': 'once',
 'distance': 0.3 <Unit('angstrom')>,
 'outOfPlaneAngle': None,
 'inPlaneAngle': None,
 'charge_increment1': 0.2 <Unit('elementary_charge')>,
 'charge_increment2': 0.0 <Unit('elementary_charge')>,
 'charge_increment3': 0.0 <Unit('elementary_charge')>,
 'charge_increment4': 0.0 <Unit('elementary_charge')>,
 'sigma': 1.0 <Unit('angstrom')>,
 'name': 'trivalent_nitrogen'}
run("N", name="trivalent_nitrogen")

Each of the molecules shown so far was selected to only have a single virtual site added, but larger molecules may have multiple copies of different types of virtual sites added. For example, this force field will give hexachlorobenzene 6 virtual sites, a dialdehyde two, or both types of virtual sites to something with a mixture of these chemistries. (Note that these parameters are made-up for instructive purposes, not the result of a force field fit, and likely to produce crashes or bad results for realistic ligands!)

viz("c1(Cl)c(Cl)c(Cl)c(Cl)c(Cl)c1Cl")
viz("O=CCCCCC=O")
viz("ClCCCC(Cl)(CC=O)CC=O")
run("O=C1c2ccc(Cl)cc2C(=O)N1[C@H]3CCC(=O)NC3=O", name="ligand")