Source code for openff.toolkit.utils.ambertools_wrapper
"""
Wrapper class providing a minimal consistent interface to `AmberTools <http://ambermd.org/AmberTools.php>`_.
"""
__all__ = ("AmberToolsToolkitWrapper",)
import subprocess
import tempfile
from collections import defaultdict
from shutil import which
from typing import TYPE_CHECKING, Optional
import numpy as np
from openff.toolkit import Quantity, unit
from openff.toolkit.utils import base_wrapper, rdkit_wrapper
from openff.toolkit.utils.exceptions import (
AntechamberNotFoundError,
ChargeCalculationError,
ChargeMethodUnavailableError,
ToolkitUnavailableException,
)
if TYPE_CHECKING:
from openff.toolkit.topology.molecule import Molecule
[docs]class AmberToolsToolkitWrapper(base_wrapper.ToolkitWrapper):
"""
AmberTools toolkit wrapper
.. warning :: This API is experimental and subject to change.
"""
_toolkit_name = "AmberTools"
_toolkit_installation_instructions = (
"The AmberTools toolkit (free and open source) can be found at "
"https://anaconda.org/conda-forge/ambertools"
)
_supported_charge_methods = {
"am1bcc": {
"antechamber_keyword": "bcc",
"min_confs": 1,
"max_confs": 1,
"rec_confs": 1,
},
"am1-mulliken": {
"antechamber_keyword": "mul",
"min_confs": 1,
"max_confs": 1,
"rec_confs": 1,
},
"gasteiger": {
"antechamber_keyword": "gas",
"min_confs": 0,
"max_confs": 0,
"rec_confs": 0,
},
}
SUPPORTED_CHARGE_METHODS = _supported_charge_methods
[docs] def __init__(self):
from openff.utilities.provenance import get_ambertools_version
super().__init__()
if not self.is_available():
raise ToolkitUnavailableException(
f"The required toolkit {self._toolkit_name} is not "
f"available. {self._toolkit_installation_instructions}"
)
self._toolkit_version = get_ambertools_version()
# TODO: Find AMBERHOME or executable home, checking miniconda if needed
# Store an instance of an RDKitToolkitWrapper for file I/O
self._rdkit_toolkit_wrapper = rdkit_wrapper.RDKitToolkitWrapper()
[docs] @staticmethod
def is_available() -> bool:
"""
Check whether the AmberTools toolkit is installed
Returns
-------
is_installed
True if AmberTools is installed, False otherwise.
"""
# TODO: Check all tools needed
ANTECHAMBER_PATH = which("antechamber")
if ANTECHAMBER_PATH is None:
return False
# AmberToolsToolkitWrapper needs RDKit to do basically anything, since its interface requires SDF I/O
if not (rdkit_wrapper.RDKitToolkitWrapper.is_available()):
return False
return True
[docs] def assign_partial_charges(
self,
molecule: "Molecule",
partial_charge_method: Optional[str] = None,
use_conformers: Optional[list[Quantity]] = None,
strict_n_conformers: bool = False,
normalize_partial_charges: bool = True,
_cls=None,
):
"""
Compute partial charges with AmberTools using antechamber/sqm, and assign
the new values to the partial_charges attribute.
.. warning :: This API experimental and subject to change.
.. todo ::
* Do we want to also allow ESP/RESP charges?
Parameters
----------
molecule
Molecule for which partial charges are to be computed
partial_charge_method
The charge model to use. One of ['gasteiger', 'am1bcc', 'am1-mulliken'].
If None, 'am1-mulliken' will be used.
use_conformers
with shape (n_atoms, 3) and dimension of distance. Optional, default = None
List of unit-wrapped numpy arrays to use for partial charge calculation.
If None, an appropriate number of conformers will be generated.
strict_n_conformers
Whether to raise an exception if an invalid number of conformers is provided for
the given charge method.
If this is False and an invalid number of conformers is found, a warning will be raised.
normalize_partial_charges
Whether to offset partial charges so that they sum to the total formal charge of the molecule.
This is used to prevent accumulation of rounding errors when the partial charge generation method has
low precision.
_cls
Molecule constructor
Raises
------
ChargeMethodUnavailableError if the requested charge method can not be handled by this toolkit
ChargeCalculationError if the charge method is supported by this toolkit, but fails
"""
import os
import subprocess
from openff.toolkit.topology import Molecule
if partial_charge_method is None:
partial_charge_method = "am1-mulliken"
else:
# Standardize method name for string comparisons
partial_charge_method = partial_charge_method.lower()
if partial_charge_method not in self._supported_charge_methods:
raise ChargeMethodUnavailableError(
f"partial_charge_method '{partial_charge_method}' is not available from AmberToolsToolkitWrapper. "
f"Available charge methods are {self._supported_charge_methods}"
)
charge_method = self._supported_charge_methods[partial_charge_method]
if _cls is None:
_cls = Molecule
# Make a temporary copy of the molecule, since we'll be messing with its conformers
mol_copy = _cls(molecule)
if use_conformers is None:
if charge_method["rec_confs"] == 0:
mol_copy._conformers = None
else:
mol_copy.generate_conformers(
n_conformers=charge_method["rec_confs"],
rms_cutoff=0.25 * unit.angstrom,
toolkit_registry=rdkit_wrapper.RDKitToolkitWrapper(),
)
# TODO: What's a "best practice" RMS cutoff to use here?
else:
mol_copy._conformers = None
for conformer in use_conformers:
mol_copy._add_conformer(conformer)
self._check_n_conformers(
mol_copy,
partial_charge_method=partial_charge_method,
min_confs=charge_method["min_confs"],
max_confs=charge_method["max_confs"],
strict_n_conformers=strict_n_conformers,
)
ANTECHAMBER_PATH = which("antechamber")
if ANTECHAMBER_PATH is None:
raise AntechamberNotFoundError(
"Antechamber not found, cannot run assign_partial_charges()"
)
# Compute charges
with tempfile.TemporaryDirectory() as tmpdir:
net_charge = mol_copy.total_charge.m_as(unit.elementary_charge)
# Write out molecule in SDF format
# TODO: How should we handle multiple conformers?
self._rdkit_toolkit_wrapper.to_file(
mol_copy, f"{tmpdir}/molecule.sdf", file_format="sdf"
)
# Compute desired charges
# TODO: Add error handling if antechamber chokes
short_charge_method = charge_method["antechamber_keyword"]
subprocess.check_output(
[
"antechamber",
"-i",
"molecule.sdf",
"-fi",
"sdf",
"-o",
"charged.mol2",
"-fo",
"mol2",
"-pf",
"yes",
"-dr",
"n",
"-c",
str(short_charge_method),
"-nc",
str(net_charge),
],
cwd=tmpdir,
)
# Write out just charges
subprocess.check_output(
[
"antechamber",
"-dr",
"n",
"-i",
"charged.mol2",
"-fi",
"mol2",
"-o",
"charges2.mol2",
"-fo",
"mol2",
"-c",
"wc",
"-cf",
"charges.txt",
"-pf",
"yes",
],
cwd=tmpdir,
)
# Check to ensure charges were actually produced
if not os.path.exists(f"{tmpdir}/charges.txt"):
# TODO: copy files into local directory to aid debugging?
raise ChargeCalculationError(
"Antechamber/sqm partial charge calculation failed on "
f"molecule {molecule.name} (SMILES {molecule.to_smiles()})"
)
# Read the charges
with open(f"{tmpdir}/charges.txt") as infile:
contents = infile.read()
text_charges = contents.split()
charges = np.zeros([molecule.n_atoms], np.float64)
for index, token in enumerate(text_charges):
charges[index] = float(token)
# TODO: Ensure that the atoms in charged.mol2 are in the same order as in molecule.sdf
charges = Quantity(charges, unit.elementary_charge)
molecule.partial_charges = charges
if normalize_partial_charges:
molecule._normalize_partial_charges()
def _modify_sqm_in_to_request_bond_orders(self, file_path):
"""
Modify a sqm.in file produced by antechamber to include the "printbondorders=1" directive
in the header. This method will overwrite the original file.
Parameters
----------
file_path
The path to sqm.in
"""
data = open(file_path).read()
# Original sqm.in file headerlooks like:
# Run semi-empirical minimization
# &qmmm
# qm_theory='AM1', grms_tol=0.0005,
# scfconv=1.d-10, ndiis_attempts=700, qmcharge=0,
# /
# ... (atom coordinates in something like XYZ format) ...
# To get WBOs, we need to add "printbondorders=1" to the list of keywords
# First, split the sqm.in text at the "/" mark at the end of the header
datasp = data.split("/")
# Insert the "printbondorders" directive in a new line and re-add the "/"
datasp.insert(1, "printbondorders=1, \n /")
# Reassemble the file text
new_data = "".join(datasp)
# Write the new file contents, overwriting the original file.
with open(file_path, "w") as of:
of.write(new_data)
def _get_fractional_bond_orders_from_sqm_out(
self, file_path, validate_elements=None
):
"""
Process a SQM output file containing bond orders, and return a dict of the form
dict[atom_1_index, atom_2_index] = fractional_bond_order
Parameters
----------
file_path
File path for sqm output file
validate_elements
The element symbols expected in molecule index order. A ValueError will be raised
if the elements are not found in this order.
Returns
-------
bond_orders
A dictionary where the keys are tuples of two atom indices and the values are
floating-point bond orders. The keys are sorted in ascending order, such that
the lower atom index is key[0] and the higher is key[1].
"""
# Example sqm.out section with WBOs:
# Bond Orders
#
# QMMM: NUM1 ELEM1 NUM2 ELEM2 BOND_ORDER
# QMMM: 2 C 1 C 1.41107532
# QMMM: 3 C 1 C 1.41047804
# ...
# QMMM: 15 H 13 H 0.00000954
# QMMM: 15 H 14 H 0.00000813
#
# --------- Calculation Completed ----------
data = open(file_path).read()
begin_sep = """ Bond Orders
QMMM: NUM1 ELEM1 NUM2 ELEM2 BOND_ORDER
""" # noqa
end_sep = """
--------- Calculation Completed ----------
"""
# Extract the chunk of text between begin_sep and end_sep, and split it by newline
fbo_lines = data.split(begin_sep)[1].split(end_sep)[0].split("\n")
# Iterate over the lines and populate the dict to return
bond_orders = dict()
for line in fbo_lines:
linesp = line.split()
atom_index_1 = int(linesp[1])
atom_element_1 = linesp[2]
atom_index_2 = int(linesp[3])
atom_element_2 = linesp[4]
bond_order = float(linesp[5])
# If validate_elements was provided, ensure that the ordering of element symbols is what we expected
if validate_elements is not None:
if (atom_element_1 != validate_elements[atom_index_1 - 1]) or (
atom_element_2 != validate_elements[atom_index_2 - 1]
):
# raise ValueError('\n'.join(fbo_lines))
raise ValueError(
f"Elements or indexing in sqm output differ from expectation. "
f"Expected {validate_elements[atom_index_1]} with index {atom_index_1} and "
f"{validate_elements[atom_index_2]} with index {atom_index_2}, "
f"but SQM output has {atom_element_1} and {atom_element_2} for the same atoms."
)
# To make lookup easier, we identify bonds as integer tuples with the lowest atom index
# first and the highest second.
index_tuple = tuple(sorted([atom_index_1, atom_index_2]))
bond_orders[index_tuple] = bond_order
return bond_orders
[docs] def assign_fractional_bond_orders(
self,
molecule: "Molecule",
bond_order_model: Optional[str] = None,
use_conformers: Optional[list[str]] = None,
_cls=None,
):
"""
Update and store list of bond orders this molecule. Bond orders are stored on each
bond, in the `bond.fractional_bond_order` attribute.
.. warning :: This API is experimental and subject to change.
Parameters
----------
molecule
The molecule to assign wiberg bond orders to
bond_order_model
The charge model to use. Only allowed value is 'am1-wiberg'. If None, 'am1-wiberg' will be used.
use_conformers
The conformers to use for fractional bond order calculation. If None, an appropriate
number of conformers will be generated by an available ToolkitWrapper.
_cls
Molecule constructor
"""
from openff.toolkit.topology import Molecule
ANTECHAMBER_PATH = which("antechamber")
if ANTECHAMBER_PATH is None:
raise AntechamberNotFoundError(
"Antechamber not found, cannot run "
"AmberToolsToolkitWrapper.assign_fractional_bond_orders()"
)
if _cls is None:
_cls = Molecule
# Make a copy since we'll be messing with this molecule's conformers
temp_mol = _cls(molecule)
if use_conformers is None:
temp_mol.generate_conformers(
n_conformers=1,
toolkit_registry=self._rdkit_toolkit_wrapper,
)
else:
temp_mol._conformers = None
for conformer in use_conformers:
temp_mol._add_conformer(conformer)
if len(temp_mol.conformers) == 0:
raise ValueError(
"No conformers present in molecule submitted for fractional bond order calculation. Consider "
"loading the molecule from a file with geometry already present or running "
"molecule.generate_conformers() before calling molecule.assign_fractional_bond_orders"
)
# Compute bond orders
bond_order_model_to_antechamber_keyword = {"am1-wiberg": "mul"}
supported_bond_order_models = list(
bond_order_model_to_antechamber_keyword.keys()
)
if bond_order_model is None:
bond_order_model = "am1-wiberg"
bond_order_model = bond_order_model.lower()
if bond_order_model not in supported_bond_order_models:
raise ValueError(
f"Bond order model '{bond_order_model}' is not supported by AmberToolsToolkitWrapper. "
f"Supported models are {supported_bond_order_models}"
)
ac_charge_keyword = bond_order_model_to_antechamber_keyword[bond_order_model]
bond_orders = defaultdict(list)
for conformer in [*temp_mol.conformers]:
with tempfile.TemporaryDirectory() as tmpdir:
net_charge = temp_mol.total_charge
# Write out molecule in SDF format
temp_mol._conformers = [conformer]
self._rdkit_toolkit_wrapper.to_file(
temp_mol, f"{tmpdir}/molecule.sdf", file_format="sdf"
)
# Prepare sqm.in file as if we were going to run charge calc
# TODO: Add error handling if antechamber chokes
subprocess.check_output(
[
"antechamber",
"-i",
"molecule.sdf",
"-fi",
"sdf",
"-o",
"sqm.in",
"-fo",
"sqmcrt",
"-pf",
"yes",
"-c",
ac_charge_keyword,
"-nc",
str(net_charge),
],
cwd=tmpdir,
)
# Modify sqm.in to request bond order calculation
self._modify_sqm_in_to_request_bond_orders(f"{tmpdir}/sqm.in")
# Run sqm to get bond orders
subprocess.check_output(
["sqm", "-i", "sqm.in", "-o", "sqm.out", "-O"], cwd=tmpdir
)
# Ensure that antechamber/sqm did not change the indexing by checking against
# an ordered list of element symbols for this molecule
expected_elements = [atom.symbol for atom in molecule.atoms]
conformer_bond_orders = self._get_fractional_bond_orders_from_sqm_out(
f"{tmpdir}/sqm.out", validate_elements=expected_elements
)
for bond_indices, value in conformer_bond_orders.items():
bond_orders[bond_indices].append(value)
# Note that sqm calculate WBOs for ALL PAIRS of atoms, not just those that have
# bonds defined in the original molecule. So here we iterate over the bonds in
# the original molecule and only nab the WBOs for those.
for bond in molecule.bonds:
# The atom index tuples that act as bond indices are ordered from lowest to highest by
# _get_fractional_bond_orders_from_sqm_out, so here we make sure that we look them up in
# sorted order as well
sorted_atom_indices = sorted(
tuple([bond.atom1_index + 1, bond.atom2_index + 1])
)
bond.fractional_bond_order = np.mean(
bond_orders[tuple(sorted_atom_indices)]
)