Source code for openff.recharge.charges.bcc

"""Generate bond charge corrections for molecules from a collection of BCC parameters."""

import json
import os
from typing import TYPE_CHECKING, Any, Dict, List, Optional

import numpy
from openff.units import unit
from openff.utilities import get_data_file_path, requires_package
from openff.recharge._pydantic import BaseModel, Field, constr

from openff.recharge.aromaticity import AromaticityModel, AromaticityModels
from openff.recharge.charges.exceptions import ChargeAssignmentError
from openff.recharge.charges.qc import QCChargeGenerator, QCChargeSettings
from openff.recharge.conformers import ConformerGenerator, ConformerSettings
from openff.recharge.utilities.exceptions import (
    UnsupportedBCCSmirksError,
    UnsupportedBCCValueError,
)
from openff.recharge.utilities.toolkits import match_smirks

if TYPE_CHECKING:
    from openff.toolkit import Molecule
    from openff.toolkit.typing.engines.smirnoff.parameters import (
        ChargeIncrementModelHandler,
    )


[docs]class BCCParameter(BaseModel):
    """An object which encodes the value of a bond-charge correction, the chemical
    environment to which it should be applied, and provenance about its source.
    """

    smirks: constr(min_length=1) = Field(
        ...,
        description="A SMIRKS pattern that encodes the chemical environment that "
        "this correction should be applied to.",
    )
    value: float = Field(..., description="The value [e] of this correction.")

    provenance: Optional[Dict[str, Any]] = Field(
        None, description="Provenance information about this bond charge correction."
    )


[docs]class BCCCollection(BaseModel):
    """The settings which describes which BCCs should be applied,
    as well as information about how they should be applied."""

    parameters: List[BCCParameter] = Field(
        ..., description="The bond charge corrections to apply."
    )
    aromaticity_model: AromaticityModels = Field(
        AromaticityModels.AM1BCC,
        description="The model to use when assigning aromaticity.",
    )

[docs]    def to_smirnoff(self) -> "ChargeIncrementModelHandler":
        """Converts this collection of bond charge correction parameters to
        a SMIRNOFF bond charge increment parameter handler.

        Notes
        -----
        * The AM1BCC charges applied by this handler will likely not match
          those computed using the built-in OpenEye implementation as that
          implementation uses a custom aromaticity model not supported by
          SMIRNOFF. This is in addition to potential conversion errors of the
          parameters into the SMIRKS language.

        * The aromaticity model defined by the collection will be ignored as
          this handler will parse the aromaticity model directly from the
          top level aromaticity node of the the SMIRNOFF specification.

        Returns
        -------
            The constructed parameter handler.
        """
        from openff.toolkit.typing.engines.smirnoff.parameters import (
            ChargeIncrementModelHandler,
        )

        # noinspection PyTypeChecker
        bcc_parameter_handler = ChargeIncrementModelHandler(version="0.3")

        bcc_parameter_handler.number_of_conformers = 500
        bcc_parameter_handler.partial_charge_method = "am1elf10"

        for bcc_parameter in reversed(self.parameters):
            bcc_parameter_handler.add_parameter(
                {
                    "smirks": bcc_parameter.smirks,
                    "charge_increment": [
                        bcc_parameter.value * unit.elementary_charge,
                        -bcc_parameter.value * unit.elementary_charge,
                    ],
                }
            )

        return bcc_parameter_handler

[docs]    @classmethod
    @requires_package("openff.toolkit")
    def from_smirnoff(
        cls,
        parameter_handler: "ChargeIncrementModelHandler",
        aromaticity_model=AromaticityModels.MDL,
    ) -> "BCCCollection":
        """Attempts to convert a SMIRNOFF bond charge increment parameter handler
        to a bond charge parameter collection.

        Notes
        -----
        * Only bond charge corrections (i.e. corrections whose SMIRKS only involve
          two tagged atoms) are supported currently.

        Parameters
        ----------
        parameter_handler
            The parameter handler to convert.
        aromaticity_model
            The model which describes how aromaticity should be assigned
            when applying the bond charge correction parameters.

        Returns
        -------
            The converted bond charge correction collection.
        """
        bcc_parameters = []

        for off_parameter in reversed(parameter_handler.parameters):
            smirks = off_parameter.smirks

            if len(off_parameter.charge_increment) not in [1, 2]:
                raise UnsupportedBCCSmirksError(
                    smirks, len(off_parameter.charge_increment)
                )

            forward_value = off_parameter.charge_increment[0].m_as(
                unit.elementary_charge
            )
            reverse_value = -forward_value

            if len(off_parameter.charge_increment) > 1:
                reverse_value = off_parameter.charge_increment[1].m_as(
                    unit.elementary_charge
                )

            if not numpy.isclose(forward_value, -reverse_value):
                raise UnsupportedBCCValueError(
                    smirks,
                    forward_value,
                    reverse_value,
                )

            bcc_parameters.append(
                BCCParameter(smirks=smirks, value=forward_value, provenance={})
            )

        return cls(parameters=bcc_parameters, aromaticity_model=aromaticity_model)

[docs]    def vectorize(self, smirks: List[str]) -> numpy.ndarray:
        """Returns a flat vector of the charge increment values associated with each
        SMIRKS pattern in a specified list.

        Parameters
        ----------
        smirks
            A list of SMIRKS patterns corresponding to the BCC values to include
            in the returned vector.

        Returns
        -------
            A flat vector of charge increments with shape=(n_smirks, 1)
        """

        parameters = {parameter.smirks: parameter for parameter in self.parameters}
        return numpy.array([[parameters[pattern].value] for pattern in smirks])


[docs]class BCCGenerator:
    """A class for generating the bond charge corrections which should
    be applied to a molecule."""

    @classmethod
    def _validate_assignment_matrix(
        cls,
        molecule: "Molecule",
        assignment_matrix: numpy.ndarray,
        bcc_counts_matrix: numpy.ndarray,
        bcc_collection: BCCCollection,
    ):
        """Validates the assignment matrix.

        Parameters
        ----------
        molecule
            The molecule which the assignment matrix was generated
            for.
        assignment_matrix
            The assignment matrix to validate with
            shape=(n_atoms, n_bond_charge_corrections)
        bcc_counts_matrix
            A matrix which contains the number of times that a bond charge
            correction is applied to a given atom with
            shape=(n_atoms, n_bond_charge_corrections).
        bcc_collection
            The collection of parameters to be assigned.
        """

        # Check for unassigned atoms
        n_atoms = molecule.n_atoms

        unassigned_atoms = numpy.where(~bcc_counts_matrix[:n_atoms].any(axis=1))[0]

        if len(unassigned_atoms) > 0:
            unassigned_atom_string = ", ".join(map(str, unassigned_atoms))

            raise ChargeAssignmentError(
                f"Atoms {unassigned_atom_string} could not be assigned a bond "
                f"charge correction atom type."
            )

        # Check for non-zero contributions from charge corrections
        non_zero_assignments = assignment_matrix.sum(axis=0).nonzero()[0]

        if len(non_zero_assignments) > 0:
            correction_smirks = [
                bcc_collection.parameters[index].smirks
                for index in non_zero_assignments
            ]

            raise ChargeAssignmentError(
                f"An internal error occurred. The {correction_smirks} were applied in "
                f"such a way so that the bond charge corrections alter the total "
                f"charge of the molecule"
            )

        # Ensure all bonds have been assigned a BCC.
        n_assignments = bcc_counts_matrix.sum(axis=1)

        unassigned_atoms = {
            index: (len(atom.bonds), n_assignments[index])
            for index, atom in enumerate(molecule.atoms)
            if len(atom.bonds) != n_assignments[index]
        }

        if len(unassigned_atoms) > 0:
            unassigned_atom_string = "\n".join(
                [
                    f"atom {index}: expected={n_expected} assigned={n_actual}"
                    for index, (n_expected, n_actual) in unassigned_atoms.items()
                ]
            )

            raise ChargeAssignmentError(
                f"Bond charge corrections could not be applied to all bonds in the "
                f"molecule:\n\n{unassigned_atom_string}"
            )

[docs]    @classmethod
    def build_assignment_matrix(
        cls,
        molecule: "Molecule",
        bcc_collection: BCCCollection,
    ) -> numpy.ndarray:
        """Generates a matrix that specifies which bond charge corrections have been
        applied to which atoms in the molecule.

        The matrix takes the form `[atom_index, bcc_index]` where `atom_index` is the
        index of an atom in the molecule and `bcc_index` is the index of a bond charge
        correction. Each value in the matrix can either be positive or negative
        depending on the direction the BCC was applied in.

        Parameters
        ----------
        molecule
            The molecule to assign the bond charge corrections to.
        bcc_collection
            The bond charge correction parameters that may be assigned.

        Returns
        -------
            The assignment matrix with shape=(n_atoms, n_bond_charge_corrections)
            where `n_atoms` is the number of atoms in the molecule and
            `n_bond_charge_corrections` is the number of bond charges corrections
            to apply.
        """

        is_atom_aromatic, is_bond_aromatic = AromaticityModel.apply(
            molecule, bcc_collection.aromaticity_model
        )

        bond_charge_corrections = bcc_collection.parameters

        assignment_matrix = numpy.zeros(
            (molecule.n_atoms, len(bond_charge_corrections))
        )
        bcc_counts_matrix = numpy.zeros(
            (molecule.n_atoms, len(bond_charge_corrections))
        )

        matched_bonds = set()

        for index in range(len(bond_charge_corrections)):
            bond_correction = bond_charge_corrections[index]

            matches = match_smirks(
                bond_correction.smirks,
                molecule,
                is_atom_aromatic,
                is_bond_aromatic,
                False,
            )

            for matched_indices in matches:
                forward_matched_bond = (matched_indices[0], matched_indices[1])
                reverse_matched_bond = (matched_indices[1], matched_indices[0])

                if (
                    forward_matched_bond in matched_bonds
                    or reverse_matched_bond in matched_bonds
                ):
                    continue

                assignment_matrix[matched_indices[0], index] += 1
                assignment_matrix[matched_indices[1], index] -= 1

                bcc_counts_matrix[matched_indices[0], index] += 1
                bcc_counts_matrix[matched_indices[1], index] += 1

                matched_bonds.add(forward_matched_bond)
                matched_bonds.add(reverse_matched_bond)

        # Validate the assignments
        cls._validate_assignment_matrix(
            molecule, assignment_matrix, bcc_counts_matrix, bcc_collection
        )

        return assignment_matrix

[docs]    @classmethod
    def apply_assignment_matrix(
        cls,
        assignment_matrix: numpy.ndarray,
        bcc_collection: BCCCollection,
    ) -> numpy.ndarray:
        """Applies an assignment matrix to a list of bond charge corrections
        yield the final bond-charge corrections for a molecule.

        Parameters
        ----------
        assignment_matrix
            The bond-charge correction matrix constructed using
            ``build_assignment_matrix`` which describes how the
            bond charge corrections should be applied. This
            should have shape=(n_atoms, n_bond_charge_corrections)
        bcc_collection
            The bond charge correction parameters which may be assigned.

        Returns
        -------
            The bond-charge corrections with shape=(n_atoms, 1).
        """
        # Create a vector of the corrections to apply
        correction_values = numpy.array(
            [
                [bond_charge_correction.value]
                for bond_charge_correction in bcc_collection.parameters
            ]
        )

        # Apply the corrections
        charge_corrections = assignment_matrix @ correction_values

        if not numpy.isclose(charge_corrections.sum(), 0.0):
            raise ChargeAssignmentError(
                "An internal error occurred. The bond charge corrections were applied "
                "in such a way so that the total charge of the molecule will be "
                "altered."
            )

        return charge_corrections

[docs]    @classmethod
    def applied_corrections(
        cls,
        *molecules: "Molecule",
        bcc_collection: BCCCollection,
    ) -> List[BCCParameter]:
        """Returns the bond charge corrections which will be applied
        to a given molecule.

        Parameters
        ----------
        molecules
            The molecule which the bond charge corrections would be applied to.
        bcc_collection
            The bond charge correction parameters which may be assigned.
        """

        applied_corrections = []

        for molecule in molecules:
            assignment_matrix = cls.build_assignment_matrix(molecule, bcc_collection)
            applied_correction_indices = numpy.where(assignment_matrix.any(axis=0))[0]

            applied_corrections.extend(
                bcc_collection.parameters[index]
                for index in applied_correction_indices
                if bcc_collection.parameters[index] not in applied_corrections
            )

        applied_corrections.sort(key=lambda x: bcc_collection.parameters.index(x))
        return applied_corrections

[docs]    @classmethod
    def generate(
        cls,
        molecule: "Molecule",
        bcc_collection: BCCCollection,
    ) -> numpy.ndarray:
        """Generate a set of charge increments for a molecule.

        Parameters
        ----------
        molecule
            The molecule to generate the bond-charge corrections for.
        bcc_collection
            The bond charge correction parameters which may be assigned.

        Returns
        -------
            The bond-charge corrections which should be applied to the
            molecule.
        """

        assignment_matrix = cls.build_assignment_matrix(molecule, bcc_collection)

        generated_corrections = cls.apply_assignment_matrix(
            assignment_matrix, bcc_collection
        )

        return generated_corrections


[docs]def original_am1bcc_corrections() -> BCCCollection:
    """Returns the bond charge corrections originally reported
    in the literture [1]_.

    References
    ----------
    [1] Jakalian, A., Jack, D. B., & Bayly, C. I. (2002). Fast, efficient
        generation of high-quality atomic charges. AM1-BCC model: II.
        Parameterization and validation. Journal of computational chemistry,
        23(16), 1623–1641.
    """
    bcc_file_path = get_data_file_path(
        os.path.join("bcc", "original-am1-bcc.json"), "openff.recharge"
    )

    with open(bcc_file_path) as file:
        bcc_dictionaries = json.load(file)

    bond_charge_corrections = [
        BCCParameter(**dictionary) for dictionary in bcc_dictionaries
    ]

    return BCCCollection(
        parameters=bond_charge_corrections, aromaticity_model=AromaticityModels.AM1BCC
    )


[docs]def compare_openeye_parity(molecule: "Molecule") -> bool:
    """A utility function to compute the bond charge corrections
    on a molecule using both the internal AM1BCC implementation,
    and the OpenEye AM1BCC implementation.

    This method is mainly only to be used for testing and validation
    purposes.

    Parameters
    ----------
    molecule
        The molecule to compute the charges of.

    Returns
    -------
        Whether the internal and OpenEye implementations are in
        agreement for this molecule.
    """

    bond_charge_corrections = original_am1bcc_corrections()

    # Generate a conformer for the molecule.
    conformers = ConformerGenerator.generate(
        molecule,
        ConformerSettings(method="omega", sampling_mode="sparse", max_conformers=1),
    )

    # Generate a set of reference charges using the OpenEye implementation
    reference_charges = QCChargeGenerator.generate(
        molecule,
        conformers,
        QCChargeSettings(theory="am1bcc", symmetrize=False, optimize=False),
    )

    # Generate a set of charges using this frameworks functions
    am1_charges = QCChargeGenerator.generate(
        molecule,
        conformers,
        QCChargeSettings(theory="am1", symmetrize=False, optimize=False),
    )

    # Determine the values of the OE BCCs
    reference_charge_corrections = reference_charges - am1_charges

    # Compute the internal BCCs
    assignment_matrix = BCCGenerator.build_assignment_matrix(
        molecule, bond_charge_corrections
    )
    charge_corrections = BCCGenerator.apply_assignment_matrix(
        assignment_matrix, bond_charge_corrections
    )

    implementation_charges = am1_charges + charge_corrections

    # Check that there is no difference between the implemented and reference charges.
    return numpy.allclose(reference_charges, implementation_charges) and numpy.allclose(
        charge_corrections, reference_charge_corrections
    )