Source code for openff.toolkit.utils.openeye_wrapper

"""
Wrapper class providing a minimal consistent interface to
the `OpenEye Toolkit <https://docs.eyesopen.com/toolkits/python/quickstart-python/index.html>`_
"""

__all__ = ("OpenEyeToolkitWrapper",)


import importlib
import logging
import pathlib
import re
import tempfile
from collections import defaultdict
from functools import wraps
from typing import TYPE_CHECKING, Any, Optional, Union

import numpy as np
from cachetools import LRUCache, cached
from typing_extensions import TypeAlias

from openff.toolkit import Quantity, unit

if TYPE_CHECKING:
    from openff.toolkit.topology.molecule import FrozenMolecule, Molecule, Bond, Atom

from openff.units.elements import SYMBOLS

from openff.toolkit.utils.base_wrapper import (
    ToolkitWrapper,
    _ChargeSettings,
    _mol_to_ctab_and_aro_key,
)
from openff.toolkit.utils.constants import (
    ALLOWED_AROMATICITY_MODELS,
    DEFAULT_AROMATICITY_MODEL,
)
from openff.toolkit.utils.exceptions import (
    ChargeCalculationError,
    ChargeMethodUnavailableError,
    ConformerGenerationError,
    GAFFAtomTypeWarning,
    InChIParseError,
    InconsistentStereochemistryError,
    InvalidAromaticityModelError,
    InvalidIUPACNameError,
    LicenseError,
    NotAttachedToMoleculeError,
    OpenEyeError,
    OpenEyeImportError,
    RadicalsNotSupportedError,
    SMILESParseError,
    ToolkitUnavailableException,
    UnassignedChemistryInPDBError,
    UndefinedStereochemistryError,
)
from openff.toolkit.utils.utils import inherit_docstrings

logger = logging.getLogger(__name__)


TTA: TypeAlias = tuple[tuple[Any, ...], ...]


def get_oeformat(file_format: str) -> str:
    from openeye import oechem

    file_format = file_format.upper()
    # XXX This is what RDKit does. Should be supported here too?
    if file_format == "MOL":
        file_format = "SDF"

    oeformat = getattr(oechem, "OEFormat_" + file_format, None)
    if oeformat is None:
        raise ValueError(f"Unsupported file format: {file_format}")
    return oeformat


[docs]@inherit_docstrings
class OpenEyeToolkitWrapper(ToolkitWrapper):
    """
    OpenEye toolkit wrapper

    .. warning :: This API is experimental and subject to change.
    """

    _toolkit_name = "OpenEye Toolkit"
    _toolkit_installation_instructions = (
        "The OpenEye Toolkits can be installed via "
        "`mamba install openeye-toolkits -c openeye`"
    )
    _toolkit_license_instructions = (
        "The OpenEye Toolkits require a (free for academics) license, see "
        "https://docs.eyesopen.com/toolkits/python/quickstart-python/license.html"
    )
    # This could belong to ToolkitWrapper, although it seems strange
    # to carry that data for open-source toolkits
    _is_licensed: Optional[bool] = None
    # Only for OpenEye is there potentially a difference between
    # being available and installed
    _is_installed: Optional[bool] = None
    _license_functions: dict[str, str] = {
        "oechem": "OEChemIsLicensed",
        "oequacpac": "OEQuacPacIsLicensed",
        "oeiupac": "OEIUPACIsLicensed",
        "oeomega": "OEOmegaIsLicensed",
    }
    _supported_charge_methods = {
        "am1bcc": {
            "oe_charge_method": "OEAM1BCCCharges",
            "min_confs": 1,
            "max_confs": 1,
            "rec_confs": 1,
        },
        "am1-mulliken": {
            "oe_charge_method": "OEAM1Charges",
            "min_confs": 1,
            "max_confs": 1,
            "rec_confs": 1,
        },
        "gasteiger": {
            "oe_charge_method": "OEGasteigerCharges",
            "min_confs": 0,
            "max_confs": 0,
            "rec_confs": 0,
        },
        "mmff94": {
            "oe_charge_method": "OEMMFF94Charges",
            "min_confs": 0,
            "max_confs": 0,
            "rec_confs": 0,
        },
        "am1bccnosymspt": {
            "oe_charge_method": "OEAM1BCCCharges",
            "min_confs": 1,
            "max_confs": 1,
            "rec_confs": 1,
        },
        "am1elf10": {
            "oe_charge_method": "OEELFCharges",
            "min_confs": 1,
            "max_confs": None,  # type: ignore[typeddict-item]
            "rec_confs": 500,
        },
        "am1bccelf10": {
            "oe_charge_method": "OEAM1BCCELF10Charges",
            "min_confs": 1,
            "max_confs": None,  # type: ignore[typeddict-item]
            "rec_confs": 500,
        },
    }

    SUPPORTED_CHARGE_METHODS = _supported_charge_methods

    _toolkit_file_read_formats = [
        "CAN",
        "CDX",
        "CSV",
        "FASTA",
        "INCHI",
        "INCHIKEY",
        "ISM",
        "MDL",
        "MF",
        "MMOD",
        "MOL2",
        "MOL2H",
        "MOPAC",
        "OEB",
        "PDB",
        "RDF",
        "SDF",
        "SKC",
        "SLN",
        "SMI",
        "USM",
        "XYC",
    ]
    _toolkit_file_write_formats = [
        "CAN",
        "CDX",
        "CSV",
        "FASTA",
        "INCHI",
        "INCHIKEY",
        "ISM",
        "MDL",
        "MF",
        "MMOD",
        "MOL2",
        "MOL2H",
        "MOPAC",
        "OEB",
        "PDB",
        "RDF",
        "SDF",
        "SKC",
        "SLN",
        "SMI",
        "USM",
        "XYC",
    ]

[docs]    def __init__(self):
        # check if the toolkit can be loaded
        if not self.is_available():
            if self._is_installed is False:
                raise ToolkitUnavailableException(
                    "OpenEye Toolkits are not installed."
                    + self._toolkit_installation_instructions
                )
            if self._is_licensed is False:
                raise LicenseError(
                    "The OpenEye Toolkits are found to be installed but not licensed and "
                    + "therefore will not be used.\n"
                    + self._toolkit_license_instructions
                )

        from openeye import __version__ as openeye_version

        self._toolkit_version = openeye_version

    @classmethod
    def _check_licenses(cls) -> bool:
        """Check license of all known OpenEye tools. Returns True if any are found
        to be licensed, False if all are not."""
        for tool, license_func in cls._license_functions.items():
            try:
                module = importlib.import_module("openeye." + tool)
            except ImportError:
                continue
            else:
                if getattr(module, license_func)():
                    return True
        return False

[docs]    @classmethod
    def is_available(cls) -> bool:
        """
        Check if the given OpenEye toolkit components are available.

        If the OpenEye toolkit is not installed or no license is found
        for at least one the required toolkits , ``False`` is returned.

        Returns
        -------
        all_installed
            ``True`` if all required OpenEye tools are installed and licensed,
            ``False`` otherwise

        """
        if cls._is_available is None:
            if cls._is_licensed is None:
                cls._is_licensed = cls._check_licenses()
            if cls._is_installed is None:
                for tool in cls._license_functions.keys():
                    cls._is_installed = True
                    try:
                        importlib.import_module("openeye." + tool)
                    except ImportError:
                        cls._is_installed = False
            if cls._is_installed:
                if cls._is_licensed:
                    cls._is_available = True
                else:
                    cls._is_available = False
            cls._is_available = cls._is_installed and cls._is_licensed
        return cls._is_available  # type: ignore[return-value]

[docs]    def from_object(
        self,
        obj,
        allow_undefined_stereo: bool = False,
        _cls=None,
    ) -> "FrozenMolecule":
        """
        Convert an OEMol (or OEMol-derived object) into an openff.toolkit.topology.molecule

        Parameters
        ----------
        obj
            An object to by type-checked.
        allow_undefined_stereo
            Whether to accept molecules with undefined stereocenters. If False,
            an exception will be raised if a molecule with undefined stereochemistry
            is passed into this function.
        _cls
            Molecule constructor

        Returns
        -------
        Molecule
            An openff.toolkit.topology.molecule Molecule.

        Raises
        ------
        NotImplementedError
            If the object could not be converted into a Molecule.
        """
        # TODO: Add tests for the from_object functions
        from openeye import oechem

        if _cls is None:
            from openff.toolkit.topology.molecule import Molecule

            _cls = Molecule

        if isinstance(obj, oechem.OEMolBase):
            return self.from_openeye(
                oemol=obj,
                allow_undefined_stereo=allow_undefined_stereo,
                _cls=_cls,
            )
        raise NotImplementedError(
            "Cannot create Molecule from {} object".format(type(obj))
        )

    def _polymer_openmm_topology_to_offmol(
        self, mol_class, omm_top, substructure_dictionary
    ):
        oemol = self._polymer_openmm_topology_to_oemol(omm_top, substructure_dictionary)
        offmol = mol_class.from_openeye(oemol, allow_undefined_stereo=True)
        return offmol

    def _polymer_openmm_topology_to_oemol(
        self,
        omm_top,
        substructure_library,
    ):
        """
        Parameters
        ----------
        omm_top
            OpenMM Topology loaded from PDB
        substructure_library
            A dictionary of substructures. substructure_library[aa_name] = list[tagged SMARTS, list[atom_names]]

        Returns
        -------
        oemol
            a new molecule with charges and bond order added
        """
        from openeye import oechem

        oemol = self._get_connectivity_from_openmm_top(omm_top)

        already_assigned_nodes = set()
        already_assigned_edges = set()

        # Keeping track of which atoms are matched where will help us with error
        # messages
        matches = defaultdict(list)

        for res_name in substructure_library:
            # TODO: This is a hack for the moment since we don't have a more sophisticated way to resolve clashes
            # so it just does the biggest substructures first
            sorted_substructure_smarts = sorted(
                substructure_library[res_name], key=len, reverse=True
            )
            for substructure_smarts in sorted_substructure_smarts:
                ss = self._fuzzy_query(substructure_smarts)
                for match in ss.Match(oemol, True):
                    match_mol_atom_indices = [
                        at.GetIdx() for at in match.GetTargetAtoms()
                    ]
                    for i in match_mol_atom_indices:
                        matches[i].append(res_name)

                    if any(
                        m in already_assigned_nodes for m in match_mol_atom_indices
                    ) and (res_name not in ["PEPTIDE_BOND", "DISULFIDE"]):
                        continue

                    for substructure_atom, mol_atom in zip(
                        match.GetPatternAtoms(), match.GetTargetAtoms()
                    ):
                        mol_atom.SetFormalCharge(substructure_atom.GetFormalCharge())
                        # Set arbitrary initial stereochemistry to avoid
                        # spamming "undefined stereo" warnings. In the from_polymer_pdb
                        # code path, the "real stereo" will be assigned later by a
                        # call to _assign_aromaticity_and_stereo_from_3d.
                        neighs = [n for n in mol_atom.GetAtoms()]
                        mol_atom.SetStereo(
                            neighs, oechem.OEAtomStereo_Tetra, oechem.OEAtomStereo_Left
                        )

                        already_assigned_nodes.add(mol_atom.GetIdx())
                    for substructure_bond, mol_bond in zip(
                        match.GetPatternBonds(), match.GetTargetBonds()
                    ):
                        mol_bond.SetOrder(substructure_bond.GetOrder())
                        already_assigned_edges.add(
                            tuple(sorted([mol_bond.GetBgnIdx(), mol_bond.GetEndIdx()]))
                        )

        oemol_n_atoms = len([*oemol.GetAtoms()])
        unassigned_atoms = sorted(set(range(oemol_n_atoms)) - already_assigned_nodes)

        all_bonds = set(
            [
                tuple(sorted([bond.GetBgnIdx(), bond.GetEndIdx()]))
                for bond in oemol.GetBonds()
            ]
        )
        unassigned_bonds = sorted(all_bonds - already_assigned_edges)

        if unassigned_atoms or unassigned_bonds:
            # Some advanced error reporting needs to interpret the substructure smarts to do things like
            # compare atom counts. Since OFFTK doesn't have a native class to hold fragments, we convert
            # the smarts into a sorted list of symbols to help with generating the error message.
            resname_to_symbols_and_atomnames: dict[str, list[tuple]] = {}
            for resname, smarts_to_atom_names in substructure_library.items():
                resname_to_symbols_and_atomnames[resname] = list()
                for smarts, atom_names in smarts_to_atom_names.items():
                    qmol = oechem.OEQMol()
                    oechem.OEParseSmiles(qmol, smarts)
                    symbols = sorted(
                        [SYMBOLS[atom.GetAtomicNum()] for atom in qmol.GetAtoms()]
                    )
                    resname_to_symbols_and_atomnames[resname].append(
                        (symbols, atom_names)
                    )

            raise UnassignedChemistryInPDBError(
                substructure_library=resname_to_symbols_and_atomnames,
                omm_top=omm_top,
                unassigned_atoms=unassigned_atoms,
                unassigned_bonds=unassigned_bonds,
                matches=matches,
            )

        return oemol

    def _get_connectivity_from_openmm_top(self, omm_top):
        from openeye import oechem

        oemol = oechem.OEMol()
        # Add atoms
        oemol_atoms = list()  # list of corresponding oemol atoms
        for atom in omm_top.atoms():
            oeatom = oemol.NewAtom(atom.element.atomic_number)
            oemol_atoms.append(oeatom)

        # Add bonds
        oemol_bonds = list()  # list of corresponding oemol bonds
        for bond in omm_top.bonds():
            atom1_index = bond[0].index
            atom2_index = bond[1].index
            oebond = oemol.NewBond(oemol_atoms[atom1_index], oemol_atoms[atom2_index])
            oemol_bonds.append(oebond)
        return oemol

    @staticmethod
    def _fuzzy_query(query):
        """return a copy of Query which is less specific:
        - ignore aromaticity and hybridization of atoms (i.e. [#6] not C)
        - ignore bond orders
        - ignore formal charges
        """
        from openeye import oechem

        from openff.toolkit.utils.exceptions import SMIRKSParsingError

        #  Jeff wasn't able to get this working with OEQMol and OEParseSmarts,
        #  the QMol/SS matching didn't behave correctly when set to AtomicNumber
        qmol = oechem.OEMol()
        status = oechem.OEParseSmiles(
            qmol,
            query,
        )

        if not status:
            raise SMIRKSParsingError(
                f"OpenEye Toolkit was unable to parse SMIRKS {query}"
            )
        ss = oechem.OESubSearch(qmol, oechem.OEExprOpts_AtomicNumber, 0)
        return ss

[docs]    def from_file(
        self,
        file_path: Union[str, pathlib.Path],
        file_format: str,
        allow_undefined_stereo: bool = False,
        _cls=None,
    ) -> list["FrozenMolecule"]:
        """
        Return an openff.toolkit.topology.Molecule from a file using this toolkit.

        Parameters
        ----------
        file_path
            The file to read the molecule from
        file_format
            Format specifier, usually file suffix (eg. 'MOL2', 'SMI')
            Note that not all toolkits support all formats. Check ToolkitWrapper.toolkit_file_read_formats for details.
        allow_undefined_stereo
            If false, raises an exception if oemol contains undefined stereochemistry.
        _cls
            Molecule constructor

        Returns
        -------
        molecules
            The list of ``Molecule`` objects in the file.

        Raises
        ------
        GAFFAtomTypeWarning
            If the loaded mol2 file possibly uses GAFF atom types, which
            are not supported.

        Examples
        --------

        Load a mol2 file into an OpenFF ``Molecule`` object.

        >>> from openff.toolkit.utils import get_data_file_path
        >>> mol2_file_path = get_data_file_path('molecules/cyclohexane.mol2')
        >>> toolkit = OpenEyeToolkitWrapper()
        >>> molecule = toolkit.from_file(mol2_file_path, file_format='mol2')

        """
        from openeye import oechem

        if isinstance(file_path, pathlib.Path):
            _file_path = file_path.as_posix()
        else:
            _file_path = file_path

        oeformat = get_oeformat(file_format)
        ifs = oechem.oemolistream(_file_path)
        if not ifs.IsValid():
            # Get Python to report an error message, if possible.
            # This can distinguish between FileNotFound, IsADirectoryError, etc.
            open(_file_path).close()
            # If that worked, then who knows. Fail anyway.
            raise OSError("Unable to open file")

        ifs.SetFormat(oeformat)

        return self._read_oemolistream_molecules(
            ifs, allow_undefined_stereo, file_path=_file_path, _cls=_cls
        )

[docs]    def from_file_obj(
        self,
        file_obj,
        file_format: str,
        allow_undefined_stereo: bool = False,
        _cls=None,
    ) -> list["Molecule"]:
        """
        Return an openff.toolkit.topology.Molecule from a file-like object (an object with a ".read()" method using
        this toolkit.

        Parameters
        ----------
        file_obj
            The file-like object to read the molecule from
        file_format
            Format specifier, usually file suffix (eg. 'MOL2', 'SMI')
            Note that not all toolkits support all formats. Check ToolkitWrapper.toolkit_file_read_formats for details.
        allow_undefined_stereo
            If false, raises an exception if oemol contains undefined stereochemistry.
        _cls
            Molecule constructor

        Returns
        -------
        molecules
            The list of Molecule objects in the file object.

        Raises
        ------
        GAFFAtomTypeWarning
            If the loaded mol2 file possibly uses GAFF atom types, which
            are not supported.

        """
        from openeye import oechem

        # Configure input molecule stream.
        ifs = oechem.oemolistream()
        ifs.openstring(file_obj.read())
        oeformat = get_oeformat(file_format)
        ifs.SetFormat(oeformat)

        return self._read_oemolistream_molecules(ifs, allow_undefined_stereo, _cls=_cls)

[docs]    def to_file_obj(self, molecule: "Molecule", file_obj, file_format: str):
        """
        Writes an OpenFF Molecule to a file-like object

        Parameters
        ----------
        molecule
            The molecule to write
        file_obj
            The file-like object to write to
        file_format
            The format for writing the molecule data

        """
        # This function requires a text-mode file_obj.
        try:
            file_obj.write("")
        except TypeError:
            # Switch to a ValueError and use a more informative exception
            # message to match RDKit.
            raise ValueError(
                "Need a text mode file object like StringIO or a file opened with mode 't'"
            ) from None

        with tempfile.TemporaryDirectory() as tmpdir:
            path = pathlib.Path(tmpdir, f"input.{file_format.lower()}")
            self.to_file(molecule, str(path), file_format)
            file_data = path.read_text()
            file_obj.write(file_data)

[docs]    def to_file(
        self,
        molecule: "Molecule",
        file_path: Union[str, pathlib.Path],
        file_format: str,
    ):
        """
        Writes an OpenFF Molecule to a file-like object

        Parameters
        ----------
        molecule
            The molecule to write
        file_path
            The file path to write to.
        file_format
            The format for writing the molecule data

        """
        from openeye import oechem

        if isinstance(file_path, pathlib.Path):
            file_path = file_path.as_posix()

        oemol = self.to_openeye(molecule)
        ofs = oechem.oemolostream(file_path)
        if not ofs.IsValid():
            # Get Python to report an error message, if possible.
            # This can distinguish between PermissionError, IsADirectoryError, etc.
            open(file_path, "wb").close()
            # If that worked, then who knows. Fail anyway.
            raise OSError("Unable to open file")

        openeye_format = get_oeformat(file_format)
        ofs.SetFormat(openeye_format)

        if openeye_format == oechem.OEFormat_SMI:
            ofs.SetFlavor(
                openeye_format,
                self._get_smiles_flavor(isomeric=True, explicit_hydrogens=True),
            )

        # OFFTK strictly treats SDF as a single-conformer format.
        # We need to override OETK's behavior here if the user is saving a multiconformer molecule.

        # Remove all but the first conformer when writing to SDF as we only support single conformer format
        if (file_format.lower() == "sdf") and oemol.NumConfs() > 1:
            conf1 = [conf for conf in oemol.GetConfs()][0]
            flat_coords = list()
            for coord in conf1.GetCoords().values():
                flat_coords.extend(coord)
            oemol.DeleteConfs()
            oecoords = oechem.OEFloatArray(flat_coords)
            oemol.NewConf(oecoords)
        # We're standardizing on putting partial charges into SDFs under the `atom.dprop.PartialCharge` property
        if (file_format.lower() == "sdf") and (molecule.partial_charges is not None):
            partial_charges_list = [
                oeatom.GetPartialCharge() for oeatom in oemol.GetAtoms()
            ]
            partial_charges_str = " ".join([f"{val:f}" for val in partial_charges_list])
            # TODO: "dprop" means "double precision" -- Is there any way to make Python more accurately
            #  describe/infer the proper data type?
            oechem.OESetSDData(oemol, "atom.dprop.PartialCharge", partial_charges_str)

        # If the file format is "pdb" using OEWriteMolecule() rearranges the atoms (hydrogens are pushed to the bottom)
        # Issue #475 (https://github.com/openforcefield/openff-toolkit/issues/475)
        # dfhahn's workaround: Using OEWritePDBFile does not alter the atom arrangement
        if file_format.lower() == "pdb":
            if oemol.NumConfs() > 1:
                for conf in oemol.GetConfs():
                    oechem.OEWritePDBFile(ofs, conf, oechem.OEOFlavor_PDB_BONDS)
            else:
                oechem.OEWritePDBFile(ofs, oemol, oechem.OEOFlavor_PDB_BONDS)
        else:
            oechem.OEWriteMolecule(ofs, oemol)
        ofs.close()

    @staticmethod
    def _turn_oemolbase_sd_charges_into_partial_charges(oemol) -> bool:
        """
        Process an OEMolBase object and check to see whether it has an SD data pair
        where the tag is "atom.dprop.PartialCharge", indicating that it has a list of
        atomic partial charges. If so, apply those charges to the OEAtoms in the OEMolBase,
        and delete the SD data pair.

        Parameters
        ----------
        oemol
            The molecule to process

        Returns
        -------
        charges_are_present
            Whether charges are present in the SD file. This is necessary because OEAtoms
            have a default partial charge of 0.0, which makes truly zero-charge molecules
            (eg "N2", "Ar"...) indistinguishable from molecules for which partial charges
            have not been assigned. The OFF Toolkit allows this distinction with
            mol.partial_charges=None. In order to complete roundtrips within the OFFMol
            spec, we must interpret the presence or absence of this tag as a proxy for
            mol.partial_charges=None.
        """
        from openeye import oechem

        for dp in oechem.OEGetSDDataPairs(oemol):
            if dp.GetTag() == "atom.dprop.PartialCharge":
                charges_str = oechem.OEGetSDData(oemol, "atom.dprop.PartialCharge")
                charges_unitless = [float(i) for i in charges_str.split()]
                assert len(charges_unitless) == oemol.NumAtoms()
                for charge, oeatom in zip(charges_unitless, oemol.GetAtoms()):
                    oeatom.SetPartialCharge(charge)
                oechem.OEDeleteSDData(oemol, "atom.dprop.PartialCharge")
                return True
        return False

    def _read_oemolistream_molecules(
        self,
        oemolistream,
        allow_undefined_stereo: bool,
        file_path: Optional[str] = None,
        _cls=None,
    ):
        """
        Reads and return the Molecules in a OEMol input stream.

        Parameters
        ----------
        oemolistream
            The OEMol input stream to read from.
        allow_undefined_stereo
            If false, raises an exception if oemol contains undefined stereochemistry.
        file_path
            The path to the mol2 file. This is used exclusively to make
            the error message more meaningful when the mol2 files doesn't
            use Tripos atom types.
        _cls
            Molecule constructor

        Returns
        -------
        molecules
            The list of Molecule objects in the stream.

        """
        from openeye import oechem

        mols = list()
        oemol = oechem.OEMol()
        while oechem.OEReadMolecule(oemolistream, oemol):
            oechem.OEPerceiveChiral(oemol)
            oechem.OEAssignAromaticFlags(oemol, oechem.OEAroModel_MDL)
            oechem.OE3DToInternalStereo(oemol)

            # If this is either a multi-conformer or multi-molecule SD file, check to see if there are partial charges
            if (oemolistream.GetFormat() == oechem.OEFormat_SDF) and hasattr(
                oemol, "GetConfs"
            ):
                # The openFF toolkit treats each conformer in a "multiconformer" SDF as
                # a separate molecule.
                # https://github.com/openforcefield/openff-toolkit/issues/202
                # Note that there is ambiguity about how SD data and "multiconformer" SD files should be stored.
                # As a result, we have to do some weird stuff below, as discussed in
                # https://docs.eyesopen.com/toolkits/python/oechemtk/oemol.html#dude-where-s-my-sd-data

                # Jeff: I was unable to find a way to distinguish whether a SDF was multiconformer or not.
                # The logic below should handle either single- or multi-conformer SDFs.
                for conf in oemol.GetConfIter():
                    # First, we turn "conf" into an OEMCMol (OE multiconformer mol), since OTHER file formats
                    # really are multiconformer, and we will eventually feed this into the `from_openeye` function,
                    # which is made to ingest multiconformer mols.
                    this_conf_oemcmol = conf.GetMCMol()

                    # Then, we take any SD data pairs that were on the oemol, and copy them on to "this_conf_oemcmol".
                    # These SD pairs will be populated if we're dealing with a single-conformer SDF.
                    for dp in oechem.OEGetSDDataPairs(oemol):
                        oechem.OESetSDData(
                            this_conf_oemcmol, dp.GetTag(), dp.GetValue()
                        )
                    # On the other hand, these SD pairs will be populated if we're dealing with a MULTI-conformer SDF.
                    for dp in oechem.OEGetSDDataPairs(conf):
                        oechem.OESetSDData(
                            this_conf_oemcmol, dp.GetTag(), dp.GetValue()
                        )
                    # This function fishes out the special SD data tag we use for partial charge
                    # ("atom.dprop.PartialCharge"), and applies those as OETK-supported partial charges on the OEAtoms
                    has_charges = self._turn_oemolbase_sd_charges_into_partial_charges(
                        this_conf_oemcmol
                    )

                    # Finally, we feed the molecule into `from_openeye`, where it converted into an OFFMol
                    mol = self.from_openeye(
                        this_conf_oemcmol,
                        allow_undefined_stereo=allow_undefined_stereo,
                        _cls=_cls,
                    )

                    # If the molecule didn't even have the `PartialCharges` tag, we set it from zeroes to None here.
                    if not (has_charges):
                        mol.partial_charges = None
                    mols.append(mol)

            else:
                # In case this is being read from a SINGLE-molecule SD file, convert the SD field where we
                # stash partial charges into actual per-atom partial charges
                self._turn_oemolbase_sd_charges_into_partial_charges(oemol)
                mol = self.from_openeye(
                    oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=_cls
                )
                mols.append(mol)

            # Check if this is an AMBER-produced mol2 file, which we can not load because they use GAFF atom types.
            if oemolistream.GetFormat() == oechem.OEFormat_MOL2:
                self._check_mol2_gaff_atom_type(mol, file_path)

        return mols

    def _smarts_to_networkx(self, substructure_smarts):
        import networkx as nx
        from openeye import oechem

        qmol = oechem.OEQMol()
        if not oechem.OEParseSmiles(qmol, substructure_smarts):
            raise SMILESParseError(f"Error parsing SMARTS '{substructure_smarts}'")

        oechem.OEAssignHybridization(qmol)

        graph = nx.Graph()
        for atom in qmol.GetAtoms():
            atomic_number = atom.GetAtomicNum()

            graph.add_node(
                atom.GetIdx(),
                atomic_number=atomic_number,
                formal_charge=atom.GetFormalCharge(),
                map_index=atom.GetMapIdx(),
            )
        for bond in qmol.GetBonds():
            bond_order = bond.GetOrder()
            if bond_order == 0:
                raise SMILESParseError(f"A bond in '{substructure_smarts} has order 0")

            graph.add_edge(
                bond.GetBgnIdx(),
                bond.GetEndIdx(),
                bond_order=bond_order,
            )
        return graph

    def _assign_aromaticity_and_stereo_from_3d(self, offmol):
        from openeye import oechem

        oemol = offmol.to_openeye()
        oechem.OEPerceiveChiral(oemol)
        oechem.OE3DToInternalStereo(oemol)

        # Aromaticity is re-perceived in this call
        offmol_w_stereo_and_aro = self.from_openeye(oemol, allow_undefined_stereo=True)
        return offmol_w_stereo_and_aro

[docs]    def enumerate_protomers(
        self,
        molecule: "FrozenMolecule",
        max_states: int = 0,
    ) -> list["FrozenMolecule"]:
        """
        Enumerate the formal charges of a molecule to generate different protomers.
        Note that, in cases where the input molecule has an uncommon protonation state
        (for example ``[NH2-]``), the input molecule may not be included in the output.

        Parameters
        ----------
        molecule
            The molecule whose state we should enumerate

        max_states
            The maximum number of protomer states to be returned. If 0, the default,
            attempt to return all protomers.

        Returns
        ------
        molecules
            A list of the protomers of the input molecules, including the input molecule if found
            by Quacpac and not pruned by `max_states`.
        """

        from openeye import oequacpac

        options = oequacpac.OEFormalChargeOptions()
        options.SetMaxCount(max_states)

        return [
            self.from_openeye(
                protomer,
                allow_undefined_stereo=True,
                _cls=molecule.__class__,
            )
            for protomer in oequacpac.OEEnumerateFormalCharges(
                self.to_openeye(molecule=molecule),
                options,
            )
        ]

[docs]    def enumerate_stereoisomers(
        self,
        molecule: "FrozenMolecule",
        undefined_only: bool = False,
        max_isomers: int = 20,
        rationalise: bool = True,
    ) -> list["FrozenMolecule"]:
        """
        Enumerate the stereocenters and bonds of the current molecule.

        Parameters
        ----------
        molecule
            The molecule whose state we should enumerate

        undefined_only
            If we should enumerate all stereocenters and bonds or only those with undefined stereochemistry

        max_isomers
            The maximum amount of molecules that should be returned

        rationalise
            If we should try to build and rationalise the molecule to ensure it can exist


        Returns
        --------
        molecules
            A list of openff.toolkit.topology.Molecule instances

        """
        from openeye import oechem, oeomega

        oemol = self.to_openeye(molecule=molecule)

        # arguments for this function can be found here
        # <https://docs.eyesopen.com/toolkits/python/omegatk/OEConfGenFunctions/OEFlipper.html?highlight=stereoisomers>

        molecules = []
        for isomer in oeomega.OEFlipper(oemol, 200, not undefined_only, True, False):
            if rationalise:
                # try and determine if the molecule is reasonable by generating a conformer with
                # strict stereo, like embedding in rdkit
                omega = oeomega.OEOmega()
                omega.SetMaxConfs(1)
                omega.SetCanonOrder(False)
                # Don't generate random stereoisomer if not specified
                omega.SetStrictStereo(True)
                mol = oechem.OEMol(isomer)
                status = omega(mol)
                if status:
                    isomol = self.from_openeye(mol, _cls=molecule.__class__)
                    if isomol != molecule:
                        molecules.append(isomol)

            else:
                isomol = self.from_openeye(isomer, _cls=molecule.__class__)
                if isomol != molecule:
                    molecules.append(isomol)

        return molecules[:max_isomers]

[docs]    def enumerate_tautomers(
        self, molecule: "FrozenMolecule", max_states: int = 20
    ) -> list["FrozenMolecule"]:
        """
        Enumerate the possible tautomers of the current molecule

        Parameters
        ----------
        molecule
            The molecule whose state we should enumerate

        max_states
            The maximum amount of molecules that should be returned

        Returns
        -------
        molecules
            A list of openff.toolkit.topology.Molecule instances excluding the input molecule.
        """
        from openeye import oequacpac

        oemol = self.to_openeye(molecule=molecule)

        tautomers = []

        # set the options
        tautomer_options = oequacpac.OETautomerOptions()
        tautomer_options.SetApplyWarts(False)
        tautomer_options.SetMaxTautomersGenerated(max_states + 1)
        tautomer_options.SetSaveStereo(True)
        # this aligns the outputs of rdkit and openeye for the example cases
        tautomer_options.SetCarbonHybridization(False)

        for tautomer in oequacpac.OEEnumerateTautomers(oemol, tautomer_options):
            # remove the input tautomer from the output
            taut = self.from_openeye(
                tautomer, allow_undefined_stereo=True, _cls=molecule.__class__
            )
            if taut != molecule:
                tautomers.append(
                    self.from_openeye(
                        tautomer, allow_undefined_stereo=True, _cls=molecule.__class__
                    )
                )

        return tautomers

    @staticmethod
    def _check_mol2_gaff_atom_type(molecule, file_path=None):
        """Attempts to detect the presence of GAFF atom types in a molecule loaded from a mol2 file.

        For now, this raises a ``GAFFAtomTypeWarning`` if the molecule
        include Osmium and Holmium atoms, which have GAFF types OS and
        HO respectively.

        Parameters
        ----------
        molecule
            The loaded molecule.
        file_path
            The path to the mol2 file. This is used exclusively to make
            the error message more meaningful.

        """
        # Handle default.
        if file_path is None:
            file_path = ""
        else:
            # Append a ':' character that will separate the file
            # path from the molecule string representation.
            file_path = file_path + ":"
        # atomic_number: (GAFF_type, element_name)
        warning_atomic_numbers = {76: ("OS", "Osmium"), 67: ("HO", "Holmium")}

        for atom in molecule.atoms:
            try:
                atom_type, element_name = warning_atomic_numbers[atom.atomic_number]
            except KeyError:
                pass
            else:
                import warnings

                warn_msg = (
                    f'OpenEye interpreted the type "{atom_type}" in {file_path}{molecule.name}'
                    f" as {element_name}. Does your mol2 file uses Tripos SYBYL atom types?"
                    " Other atom types such as GAFF are not supported."
                )
                warnings.warn(warn_msg, GAFFAtomTypeWarning, stacklevel=2)

    @staticmethod
    def _openeye_cip_atom_stereochemistry(oemol, oeatom):
        """
        Determine CIP stereochemistry (R/S) for the specified atom

        Parameters
        ----------
        oemol
            The molecule of interest
        oeatom
            The atom whose stereochemistry is to be computed

        Returns
        -------
        stereochemistry
            'R', 'S', or None if no stereochemistry is specified or the atom is not a stereocenter
        """
        from openeye import oechem

        if not oeatom.HasStereoSpecified():
            # No stereochemical information has been stored, so this could be unknown stereochemistry
            # TODO: Should we raise an exception?
            return None

        cip = oechem.OEPerceiveCIPStereo(oemol, oeatom)

        if cip == oechem.OECIPAtomStereo_S:
            return "S"
        elif cip == oechem.OECIPAtomStereo_R:
            return "R"
        elif cip == oechem.OECIPAtomStereo_NotStereo:
            # Not a stereocenter
            # TODO: Should this be a different case from ``None``?
            return None

    @staticmethod
    def _openeye_cip_bond_stereochemistry(oemol, oebond):
        """
        Determine CIP stereochemistry (E/Z) for the specified bond

        Parameters
        ----------
        oemol
            The molecule of interest
        oebond
            The bond whose stereochemistry is to be computed

        Returns
        -------
        stereochemistry
            'E', 'Z', or None if stereochemistry is unspecified or the bond is not a stereo bond

        """
        from openeye import oechem

        if not oebond.HasStereoSpecified():
            # No stereochemical information has been stored, so this could be unknown stereochemistry
            # TODO: Should we raise an exception?
            return None

        cip = oechem.OEPerceiveCIPStereo(oemol, oebond)

        if cip == oechem.OECIPBondStereo_E:
            return "E"
        elif cip == oechem.OECIPBondStereo_Z:
            return "Z"
        elif cip == oechem.OECIPBondStereo_NotStereo:
            return None

[docs]    @staticmethod
    def from_openeye(
        oemol,
        allow_undefined_stereo: bool = False,
        _cls=None,
    ) -> "FrozenMolecule":
        """
        Create a Molecule from an OpenEye molecule. If the OpenEye molecule has
        implicit hydrogens, this function will make them explicit.

        ``OEAtom`` s have a different set of allowed value for partial charges than
        ``openff.toolkit.topology.Molecule`` s. In the OpenEye toolkits, partial charges
        are stored on individual ``OEAtom`` s, and their values are initialized to ``0.0``.
        In the Open Force Field Toolkit, an ``openff.toolkit.topology.Molecule``'s
        ``partial_charges`` attribute is initialized to ``None`` and can be set to a
        unit-wrapped numpy array with units of
        elementary charge. The Open Force
        Field Toolkit considers an ``OEMol`` where every ``OEAtom`` has a partial
        charge of ``float('nan')`` to be equivalent to an Open Force Field Toolkit `Molecule`'s
        ``partial_charges = None``.
        This assumption is made in both ``to_openeye`` and ``from_openeye``.

        .. warning :: This API is experimental and subject to change.

        Parameters
        ----------
        oemol
            An OpenEye molecule
        allow_undefined_stereo
            If false, raises an exception if oemol contains undefined stereochemistry.
        _cls
            Molecule constructor

        Returns
        -------
        molecule
            An OpenFF molecule

        Examples
        --------

        Create a Molecule from an OpenEye OEMol

        >>> from openeye import oechem
        >>> from openff.toolkit._tests.utils import get_data_file_path
        >>> ifs = oechem.oemolistream(get_data_file_path('systems/monomers/ethanol.mol2'))
        >>> oemols = list(ifs.GetOEGraphMols())

        >>> toolkit_wrapper = OpenEyeToolkitWrapper()
        >>> molecule = toolkit_wrapper.from_openeye(oemols[0])

        """
        import math

        from openeye import oechem

        # Save the existing SD tags, if any, since they're lost in the cast to
        # OEMol of the input is OEGraphMol. See issue #1711
        existing_sd_tags = {
            pair.GetTag(): pair.GetValue() for pair in oechem.OEGetSDDataIter(oemol)
        }

        oemol = oechem.OEMol(oemol)

        # Add explicit hydrogens if they're implicit
        if oechem.OEHasImplicitHydrogens(oemol):
            oechem.OEAddExplicitHydrogens(oemol)

        # TODO: Is there any risk to perceiving aromaticity here instead of later?
        oechem.OEAssignAromaticFlags(oemol, oechem.OEAroModel_MDL)

        oechem.OEPerceiveChiral(oemol)

        # Check that all stereo is specified
        # Potentially better OE stereo check: OEFlipper — Toolkits - - Python
        # https: // docs.eyesopen.com / toolkits / python / omegatk / OEConfGenFunctions / OEFlipper.html

        unspec_chiral = False
        unspec_db = False
        problematic_atoms = list()
        problematic_bonds = list()

        for oeatom in oemol.GetAtoms():
            if oeatom.IsChiral():
                if not (oeatom.HasStereoSpecified()):
                    unspec_chiral = True
                    problematic_atoms.append(oeatom)
        for oebond in oemol.GetBonds():
            if oebond.IsChiral():
                if not (oebond.HasStereoSpecified()):
                    unspec_db = True
                    problematic_bonds.append(oebond)
        if unspec_chiral or unspec_db:

            def oeatom_to_str(oeatom) -> str:
                return "atomic num: {}, name: {}, idx: {}, aromatic: {}, chiral: {}".format(
                    oeatom.GetAtomicNum(),
                    oeatom.GetName(),
                    oeatom.GetIdx(),
                    oeatom.IsAromatic(),
                    oeatom.IsChiral(),
                )

            def oebond_to_str(oebond) -> str:
                return "order: {}, chiral: {}".format(
                    oebond.GetOrder(), oebond.IsChiral()
                )

            def describe_oeatom(oeatom) -> str:
                description = "Atom {} with bonds:".format(oeatom_to_str(oeatom))
                for oebond in oeatom.GetBonds():
                    description += "\nbond {} to atom {}".format(
                        oebond_to_str(oebond), oeatom_to_str(oebond.GetNbr(oeatom))
                    )
                return description

            if (
                len(problematic_atoms) != 0 or len(problematic_bonds) != 0
            ) and not allow_undefined_stereo:
                msg = f"OEMol has unspecified stereochemistry. {oemol.GetTitle()=}"

                if len(problematic_atoms) != 0:
                    msg += "Problematic atoms are:\n"
                    for problematic_atom in problematic_atoms:
                        msg += describe_oeatom(problematic_atom) + "\n"

                if len(problematic_bonds) != 0:
                    msg += f"Problematic bonds are: {problematic_bonds}\n"

                raise UndefinedStereochemistryError(
                    f"Unable to make OFFMol from OEMol: {msg}"
                )

        if _cls is None:
            from openff.toolkit.topology.molecule import Molecule

            _cls = Molecule

        molecule = _cls()

        # OEMol.GetTitle() happens to default to an empty string
        if oemol.GetTitle() != "":
            molecule.name = oemol.GetTitle()

        # Attach any SD tag information we saved before casting input to new OEMol
        for key, value in existing_sd_tags.items():
            molecule._properties[key] = value

        off_to_oe_idx = dict()  # {oemol_idx: molecule_idx}
        atom_mapping = {}
        for oeatom in oemol.GetAtoms():
            oe_idx = oeatom.GetIdx()
            map_id = oeatom.GetMapIdx()
            atomic_number = oeatom.GetAtomicNum()
            # Carry with implicit units of elementary charge for faster route through _add_atom
            formal_charge = oeatom.GetFormalCharge()
            explicit_valence = oeatom.GetExplicitValence()
            # Implicit hydrogens are never added to D- and F- block elements,
            # and the MDL valence is always the explicit valence for these
            # elements, so this does not count radical electrons in these blocks.
            mdl_valence = oechem.OEMDLGetValence(
                atomic_number, formal_charge, explicit_valence
            )
            number_radical_electrons = mdl_valence - (
                oeatom.GetImplicitHCount() + explicit_valence
            )

            if number_radical_electrons > 0:
                raise RadicalsNotSupportedError(
                    "The OpenFF Toolkit does not currently support parsing molecules with radicals. "
                    f"Found {number_radical_electrons} radical electrons on molecule "
                    f"{oechem.OECreateSmiString(oemol)}."
                )

            is_aromatic = oeatom.IsAromatic()
            stereochemistry = OpenEyeToolkitWrapper._openeye_cip_atom_stereochemistry(
                oemol, oeatom
            )
            # stereochemistry = self._openeye_cip_atom_stereochemistry(oemol, oeatom)
            name = oeatom.GetName()

            # Transfer in hierarchy metadata
            metadata_dict = dict()
            if oechem.OEHasResidues(oemol):
                metadata_dict["residue_name"] = oechem.OEAtomGetResidue(
                    oeatom
                ).GetName()
                metadata_dict["residue_number"] = oechem.OEAtomGetResidue(
                    oeatom
                ).GetResidueNumber()
                metadata_dict["insertion_code"] = oechem.OEAtomGetResidue(
                    oeatom
                ).GetInsertCode()
                metadata_dict["chain_id"] = oechem.OEAtomGetResidue(oeatom).GetChainID()
            # print('from', metadata_dict)

            atom_index = molecule._add_atom(
                atomic_number,
                formal_charge,
                is_aromatic,
                stereochemistry=stereochemistry,
                name=name,
                metadata=metadata_dict,
                invalidate_cache=False,
            )
            off_to_oe_idx[oe_idx] = (
                atom_index  # store for mapping oeatom to molecule atom indices below
            )
            atom_mapping[atom_index] = map_id

        molecule._invalidate_cached_properties()

        # If we have a full / partial atom map add it to the molecule. Zeroes 0
        # indicates no mapping
        if {*atom_mapping.values()} != {0}:
            molecule._properties["atom_map"] = {
                idx: map_idx for idx, map_idx in atom_mapping.items() if map_idx != 0
            }

        for oebond in oemol.GetBonds():
            atom1_index = off_to_oe_idx[oebond.GetBgnIdx()]
            atom2_index = off_to_oe_idx[oebond.GetEndIdx()]
            bond_order = oebond.GetOrder()
            is_aromatic = oebond.IsAromatic()
            stereochemistry = OpenEyeToolkitWrapper._openeye_cip_bond_stereochemistry(
                oemol, oebond
            )
            if oebond.HasData("fractional_bond_order"):
                fractional_bond_order = oebond.GetData("fractional_bond_order")
            else:
                fractional_bond_order = None

            molecule._add_bond(
                atom1_index,
                atom2_index,
                bond_order,
                is_aromatic=is_aromatic,
                stereochemistry=stereochemistry,
                fractional_bond_order=fractional_bond_order,
                invalidate_cache=False,
            )

        molecule._invalidate_cached_properties()

        # TODO: Copy conformations, if present
        # TODO: Come up with some scheme to know when to import coordinates
        # From SMILES: no
        # From MOL2: maybe
        # From other: maybe
        if hasattr(oemol, "GetConfs"):
            for conf in oemol.GetConfs():
                n_atoms = molecule.n_atoms
                # Store with implicit units until we're sure this conformer exists
                positions = np.zeros(shape=[n_atoms, 3], dtype=np.float64)
                for oe_id in conf.GetCoords().keys():
                    # implicitly in angstrom
                    off_atom_coords = conf.GetCoords()[oe_id]
                    off_atom_index = off_to_oe_idx[oe_id]
                    positions[off_atom_index, :] = off_atom_coords
                all_zeros = not np.any(positions)
                if all_zeros and n_atoms > 1:
                    continue
                molecule._add_conformer(Quantity(positions, unit.angstrom))

        # Store charges with implicit units in this scope
        unitless_charges = np.zeros(shape=molecule.n_atoms, dtype=np.float64)

        # If all OEAtoms have a partial charge of NaN, then the OFFMol should
        # have its partial_charges attribute set to None
        any_partial_charge_is_not_nan = False
        for oe_atom in oemol.GetAtoms():
            oe_idx = oe_atom.GetIdx()
            off_idx = off_to_oe_idx[oe_idx]
            unitless_charge = oe_atom.GetPartialCharge()
            # Once this is True, skip the isnancheck
            if not any_partial_charge_is_not_nan:
                if not math.isnan(unitless_charge):
                    any_partial_charge_is_not_nan = True
            unitless_charges[off_idx] = unitless_charge

        if any_partial_charge_is_not_nan:
            molecule.partial_charges = Quantity(
                unitless_charges, unit.elementary_charge
            )
        else:
            molecule.partial_charges = None

        return molecule

    to_openeye_cache = LRUCache(maxsize=4096)

    @cached(to_openeye_cache, key=_mol_to_ctab_and_aro_key)
    def _connection_table_to_openeye(
        self, molecule, aromaticity_model=DEFAULT_AROMATICITY_MODEL
    ):
        from openeye import oechem

        if aromaticity_model not in ALLOWED_AROMATICITY_MODELS:
            raise InvalidAromaticityModelError(
                f"Given aromaticity model {aromaticity_model} which is not in the set of allowed aromaticity models: "
                f"{ALLOWED_AROMATICITY_MODELS}"
            )

        oemol = oechem.OEMol()
        # Add atoms
        off_to_oe_idx = {}  # {off_idx : oe_idx}
        oemol_atoms = list()  # list of corresponding oemol atoms
        for atom in molecule.atoms:
            oeatom = oemol.NewAtom(atom.atomic_number)
            oeatom.SetFormalCharge(atom.formal_charge.m_as(unit.elementary_charge))
            # TODO: Do we want to provide _any_ pathway for Atom.is_aromatic to influence the OEMol?
            # oeatom.SetAromatic(atom.is_aromatic)
            oemol_atoms.append(oeatom)
            off_to_oe_idx[atom.molecule_atom_index] = oeatom.GetIdx()

        # Add bonds
        oemol_bonds = list()  # list of corresponding oemol bonds
        for bond in molecule.bonds:
            atom1_index = bond.atom1_index
            atom2_index = bond.atom2_index
            oebond = oemol.NewBond(oemol_atoms[atom1_index], oemol_atoms[atom2_index])
            oebond.SetOrder(bond.bond_order)
            # TODO: Do we want to provide _any_ pathway for Bond.is_aromatic to influence the OEMol?
            # oebond.SetAromatic(bond.is_aromatic)
            oemol_bonds.append(oebond)

        if aromaticity_model == "OEAroModel_MDL":
            oechem.OEAssignAromaticFlags(oemol, oechem.OEAroModelMDL)
        else:
            raise InvalidAromaticityModelError(
                "Aromaticity model {aromaticity_model} is not in the set of allowed aromaticity models:  "
                f"{ALLOWED_AROMATICITY_MODELS}"
            )

        # Set atom stereochemistry now that all connectivity is in place
        for atom, oeatom in zip(molecule.atoms, oemol_atoms):
            if not atom.stereochemistry:
                continue

            # Set arbitrary initial stereochemistry
            neighs = [n for n in oeatom.GetAtoms()]
            oeatom.SetStereo(
                neighs, oechem.OEAtomStereo_Tetra, oechem.OEAtomStereo_Right
            )

            # Flip chirality if stereochemistry is incorrect
            oeatom_stereochemistry = (
                OpenEyeToolkitWrapper._openeye_cip_atom_stereochemistry(oemol, oeatom)
            )
            if oeatom_stereochemistry != atom.stereochemistry:
                # Flip the stereochemistry
                oeatom.SetStereo(
                    neighs, oechem.OEAtomStereo_Tetra, oechem.OEAtomStereo_Left
                )
                # Verify it matches now as a sanity check
                oeatom_stereochemistry = (
                    OpenEyeToolkitWrapper._openeye_cip_atom_stereochemistry(
                        oemol, oeatom
                    )
                )
                if oeatom_stereochemistry != atom.stereochemistry:
                    raise InconsistentStereochemistryError(
                        "Programming error: OpenEye atom stereochemistry assumptions failed. "
                        f"The atom in the oemol has stereochemistry {oeatom_stereochemistry} and "
                        f"the atom in the offmol has stereochemistry {atom.stereochemistry}."
                    )

        # Set bond stereochemistry
        for bond, oebond in zip(molecule.bonds, oemol_bonds):
            if not bond.stereochemistry:
                continue

            atom1_index = bond.molecule.atoms.index(bond.atom1)
            atom2_index = bond.molecule.atoms.index(bond.atom2)
            # Set arbitrary initial stereochemistry
            oeatom1, oeatom2 = oemol_atoms[atom1_index], oemol_atoms[atom2_index]
            oeatom1_neighbor = [n for n in oeatom1.GetAtoms() if not n == oeatom2][0]
            oeatom2_neighbor = [n for n in oeatom2.GetAtoms() if not n == oeatom1][0]
            # oebond.SetStereo([oeatom1, oeatom2], oechem.OEBondStereo_CisTrans, oechem.OEBondStereo_Cis)
            oebond.SetStereo(
                [oeatom1_neighbor, oeatom2_neighbor],
                oechem.OEBondStereo_CisTrans,
                oechem.OEBondStereo_Cis,
            )

            # Flip stereochemistry if incorrect
            oebond_stereochemistry = (
                OpenEyeToolkitWrapper._openeye_cip_bond_stereochemistry(oemol, oebond)
            )
            if oebond_stereochemistry != bond.stereochemistry:
                # Flip the stereochemistry
                oebond.SetStereo(
                    [oeatom1_neighbor, oeatom2_neighbor],
                    oechem.OEBondStereo_CisTrans,
                    oechem.OEBondStereo_Trans,
                )
                # Verify it matches now as a sanity check
                oebond_stereochemistry = (
                    OpenEyeToolkitWrapper._openeye_cip_bond_stereochemistry(
                        oemol, oebond
                    )
                )
                if oebond_stereochemistry != bond.stereochemistry:
                    raise InconsistentStereochemistryError(
                        "Programming error: OpenEye bond stereochemistry assumptions failed. "
                        f"The bond in the oemol has stereochemistry {oebond_stereochemistry} and "
                        f"the bond in the offmol has stereochemistry {bond.stereochemistry}."
                    )

        # Clean Up phase
        # The only feature of a molecule that wasn't perceived above seemed to be ring connectivity, better to run it
        # here then for someone to inquire about ring sizes and get 0 when it shouldn't be
        oechem.OEFindRingAtomsAndBonds(oemol)

        return oemol, off_to_oe_idx

[docs]    def to_openeye(
        self,
        molecule: "FrozenMolecule",
        aromaticity_model: str = DEFAULT_AROMATICITY_MODEL,
    ):
        """
        Create an OpenEye molecule using the specified aromaticity model

        ``OEAtom`` s have a different set of allowed value for partial
        charges than ``openff.toolkit.topology.Molecule``\ s. In the
        OpenEye toolkits, partial charges are stored on individual
        ``OEAtom``\ s, and their values are initialized to ``0.0``. In
        the Open Force Field Toolkit, an``openff.toolkit.topology.Molecule``'s
        ``partial_charges`` attribute is initialized to ``None`` and can
        be set to a unit-wrapped numpy array with
        units of elementary charge. The Open Force Field Toolkit
        considers an ``OEMol`` where every ``OEAtom`` has a partial
        charge of ``float('nan')`` to be equivalent to an Open Force
        Field Toolkit ``Molecule``'s ``partial_charges = None``. This
        assumption is made in both ``to_openeye`` and ``from_openeye``.

        .. todo ::

           * Should the aromaticity model be specified in some other way?

        .. warning :: This API is experimental and subject to change.

        Parameters
        ----------
        molecule
            The molecule to convert to an OEMol
        aromaticity_model
            The aromaticity model to use

        Returns
        -------
        oemol
            An OpenEye molecule

        Examples
        --------

        Create an OpenEye molecule from a Molecule

        >>> from openff.toolkit import Molecule
        >>> toolkit_wrapper = OpenEyeToolkitWrapper()
        >>> molecule = Molecule.from_smiles('CC')
        >>> oemol = toolkit_wrapper.to_openeye(molecule)

        """
        from openeye import oechem

        oemol, off_to_oe_idx = self._connection_table_to_openeye(
            molecule, aromaticity_model=aromaticity_model
        )
        oemol = oechem.OEMol(oemol)
        # if not(molecule.name is None):
        oe_to_off_idx = dict([(j, i) for i, j in off_to_oe_idx.items()])

        oemol.SetTitle(molecule.name)
        # Make lists of OE atoms and OE bonds in the same order as the OFF atoms and OFF bonds
        oemol_atoms = [None] * molecule.n_atoms  # list of corresponding oemol atoms
        for oe_atom in oemol.GetAtoms():
            oe_idx = oe_atom.GetIdx()
            oemol_atoms[oe_to_off_idx[oe_idx]] = oe_atom
            off_atom = molecule.atoms[oe_to_off_idx[oe_idx]]
            oe_atom.SetName(off_atom.name)

            if off_atom.partial_charge is None:
                oe_atom.SetPartialCharge(float("nan"))
            else:
                charge = off_atom.partial_charge.m_as(unit.elementary_charge)
                oe_atom.SetPartialCharge(float(charge))
            res = oechem.OEAtomGetResidue(oe_atom)
            # If we add residue info without updating the serial number, all of the atom
            # serial numbers in a written PDB will be 0. Note two things:
            # 1) the "res" object is specific to this atom, so its serial number
            #    applies only to this atom
            # 2) we do NOT preserve
            #    PDB serial numbers in our infrastructure, we merely set these to the
            #    atom index in the molecule so that OpenEye-written PDBs have
            #    nonzero atom serial numbers.
            res.SetSerialNumber(oe_to_off_idx[oe_idx] + 1)

            if "residue_name" in off_atom.metadata:
                res.SetName(off_atom.metadata["residue_name"])
            else:
                res.SetName("UNL")

            if "residue_number" in off_atom.metadata:
                res.SetResidueNumber(int(off_atom.metadata["residue_number"]))
            else:
                res.SetResidueNumber(1)

            if "insertion_code" in off_atom.metadata:
                res.SetInsertCode(off_atom.metadata["insertion_code"])
            else:
                res.SetInsertCode(" ")

            if "chain_id" in off_atom.metadata:
                res.SetChainID(off_atom.metadata["chain_id"])
            else:
                res.SetChainID(" ")

            oechem.OEAtomSetResidue(oe_atom, res)

        assert None not in oemol_atoms

        oemol_bonds = [None] * molecule.n_bonds  # list of corresponding oemol bonds
        for oe_bond in oemol.GetBonds():
            at1_off_idx = oe_to_off_idx[oe_bond.GetBgnIdx()]
            at2_off_idx = oe_to_off_idx[oe_bond.GetEndIdx()]
            off_bond = molecule.get_bond_between(at1_off_idx, at2_off_idx)
            off_bond_idx = off_bond.molecule_bond_index
            oemol_bonds[off_bond_idx] = oe_bond
            if off_bond.fractional_bond_order is not None:
                oe_bond.SetData("fractional_bond_order", off_bond.fractional_bond_order)

        assert None not in oemol_bonds

        # Retain conformations, if present
        if molecule._conformers is not None:
            oemol.DeleteConfs()
            for conf in molecule._conformers:
                # OE needs a 1 x (3*n_Atoms) double array as input
                flat_coords = np.zeros(shape=oemol.NumAtoms() * 3, dtype=np.float64)
                for index, oe_idx in off_to_oe_idx.items():
                    (x, y, z) = conf[index, :].m_as(unit.angstrom)
                    flat_coords[(3 * oe_idx)] = x
                    flat_coords[(3 * oe_idx) + 1] = y
                    flat_coords[(3 * oe_idx) + 2] = z

                oecoords = oechem.OEFloatArray(flat_coords)
                oemol.NewConf(oecoords)

        # Retain charges, if present. All atoms are initialized above with a partial charge of NaN.
        if molecule.partial_charges is not None:
            oe_indexed_charges = np.zeros(shape=molecule.n_atoms, dtype=np.float64)
            assert hasattr(molecule.partial_charges, "units")
            for off_idx, charge in enumerate(molecule.partial_charges):
                oe_idx = off_to_oe_idx[off_idx]
                charge_unitless = charge.m_as(unit.elementary_charge)
                oe_indexed_charges[oe_idx] = charge_unitless
            # TODO: This loop below fails if we try to use an "enumerate"-style loop.
            #  It's worth investigating whether we make this assumption elsewhere in the codebase, since
            #  the OE docs may indicate that this sort of usage is a very bad thing to do.
            #  https://docs.eyesopen.com/toolkits/python/oechemtk/atombondindices.html#indices-for-molecule-lookup-considered-harmful
            # for oe_idx, oe_atom in enumerate(oemol.GetAtoms()):
            for oe_atom in oemol.GetAtoms():
                oe_idx = oe_atom.GetIdx()
                oe_atom.SetPartialCharge(oe_indexed_charges[oe_idx])

        # Retain properties, if present
        for key, value in molecule.properties.items():
            oechem.OESetSDData(oemol, str(key), str(value))

        return oemol

[docs]    def atom_is_in_ring(self, atom: "Atom") -> bool:
        """Return whether or not an atom is in a ring.

        It is assumed that this atom is in molecule.

        Parameters
        ----------
        atom
            The molecule containing the atom of interest

        Returns
        -------
        is_in_ring
            Whether or not the atom is in a ring.

        Raises
        ------
        NotAttachedToMoleculeError
        """
        if atom.molecule is None:
            raise NotAttachedToMoleculeError(
                "This Atom does not belong to a Molecule object"
            )

        molecule = atom.molecule
        atom_index = atom.molecule_atom_index

        oemol = molecule.to_openeye()

        # Molecule.to_openeye() is guaranteed to preserve atom ordering
        is_in_ring = [*oemol.GetAtoms()][atom_index].IsInRing()

        return is_in_ring

[docs]    def bond_is_in_ring(self, bond: "Bond") -> bool:
        """Return whether or not a bond is in a ring.

        It is assumed that this atom is in molecule.

        Parameters
        ----------
        bond
            The molecule containing the atom of interest

        Returns
        -------
        is_in_ring
            Whether or not the bond of index `bond_index` is in a ring

        Raises
        ------
        NotAttachedToMoleculeError
        """
        if bond.molecule is None:
            raise NotAttachedToMoleculeError(
                "This Bond does not belong to a Molecule object"
            )

        molecule = bond.molecule

        oemol = molecule.to_openeye()

        # Molecule.to_openeye() is NOT guaranteed to preserve bond ordering,
        # so we must look up the corresponding `OEBond` via `OEAtom`s
        oebond = oemol.GetBond(
            [*oemol.GetAtoms()][bond.atom1_index],
            [*oemol.GetAtoms()][bond.atom2_index],
        )

        is_in_ring = oebond.IsInRing()

        return is_in_ring

    def _get_smiles_flavor(self, isomeric: bool, explicit_hydrogens: bool) -> int:
        from openeye import oechem

        # this sets up the default settings following the old DEFAULT flag
        # more information on flags can be found here
        # <https://docs.eyesopen.com/toolkits/python/oechemtk/OEChemConstants/OESMILESFlag.html#OEChem::OESMILESFlag>
        smiles_options = (
            oechem.OESMILESFlag_Canonical
            | oechem.OESMILESFlag_Isotopes
            | oechem.OESMILESFlag_RGroups
        )

        # check if we want an isomeric smiles
        if isomeric:
            # add the atom and bond stereo flags
            smiles_options |= (
                oechem.OESMILESFlag_AtomStereo | oechem.OESMILESFlag_BondStereo
            )

        if explicit_hydrogens:
            # add the hydrogen flag
            smiles_options |= oechem.OESMILESFlag_Hydrogens

        return smiles_options

[docs]    def to_smiles(
        self,
        molecule: "Molecule",
        isomeric: bool = True,
        explicit_hydrogens: bool = True,
        mapped: bool = False,
    ) -> str:
        """
        Uses the OpenEye toolkit to convert a Molecule into a SMILES string.
        A partially mapped smiles can also be generated for atoms of interest by supplying an `atom_map` to the
        properties dictionary.

        Parameters
        ----------
        molecule
            The molecule to convert into a SMILES.
        isomeric
            return an isomeric smiles
        explicit_hydrogens
            return a smiles string containing all hydrogens explicitly
        mapped
            return a explicit hydrogen mapped smiles, the atoms to be mapped can be controlled by supplying an
            atom map into the properties dictionary. If no mapping is passed all atoms will be mapped in order, else
            an atom map dictionary from the current atom index to the map id should be supplied with no duplicates.
            The map ids (values) should start from 0 or 1.

        Returns
        -------
        smiles
            The SMILES of the input molecule.
        """
        from openeye import oechem

        oemol = self.to_openeye(molecule)

        smiles_options = self._get_smiles_flavor(isomeric, explicit_hydrogens)

        if mapped:
            assert explicit_hydrogens is True, (
                "Mapped smiles require all hydrogens and "
                "stereochemsitry to be defined to retain order"
            )

            # if we only want to map specific atoms check for an atom map
            atom_map = molecule._properties.get("atom_map", None)
            if atom_map is not None:
                # make sure there are no repeated indices
                map_ids = set(atom_map.values())
                if len(map_ids) < len(atom_map):
                    atom_map = None
                elif 0 in atom_map.values():
                    # we need to increment the map index
                    for atom, map in atom_map.items():
                        atom_map[atom] = map + 1

            if atom_map is None:
                # now we need to add the atom map to the atoms
                for oeatom in oemol.GetAtoms():
                    oeatom.SetMapIdx(oeatom.GetIdx() + 1)
            else:
                for atom in oemol.GetAtoms():
                    try:
                        # try to set the atom map
                        map_idx = atom_map[atom.GetIdx()]
                        atom.SetMapIdx(map_idx)
                    except KeyError:
                        continue

            smiles_options |= oechem.OESMILESFlag_AtomMaps

        smiles = oechem.OECreateSmiString(oemol, smiles_options)
        return smiles

[docs]    def to_inchi(self, molecule: "Molecule", fixed_hydrogens: bool = False) -> str:
        """
        Create an InChI string for the molecule using the OpenEye OEChem Toolkit.
        InChI is a standardised representation that does not capture tautomers
        unless specified using the fixed hydrogen layer.

        For information on InChi see here https://iupac.org/who-we-are/divisions/division-details/inchi/

        Parameters
        ----------
        molecule
            The molecule to convert into a SMILES.

        fixed_hydrogens
            If a fixed hydrogen layer should be added to the InChI, if `True` this
            will produce a non standard specific InChI string of the molecule.

        Returns
        --------
        inchi: str
            The InChI string of the molecule.
        """

        from openeye import oechem

        oemol = self.to_openeye(molecule)

        if fixed_hydrogens:
            opts = oechem.OEInChIOptions()
            opts.SetFixedHLayer(True)
            inchi = oechem.OEMolToInChI(oemol)

        else:
            inchi = oechem.OEMolToSTDInChI(oemol)

        return inchi

[docs]    def to_inchikey(self, molecule: "Molecule", fixed_hydrogens: bool = False) -> str:
        """
        Create an InChIKey for the molecule using the OpenEye OEChem Toolkit.
        InChIKey is a standardised representation that does not capture tautomers
        unless specified using the fixed hydrogen layer.

        For information on InChi see here https://iupac.org/who-we-are/divisions/division-details/inchi/

        Parameters
        ----------
        molecule
            The molecule to convert into a SMILES.

        fixed_hydrogens
            If a fixed hydrogen layer should be added to the InChI, if `True` this will produce a non standard specific
            InChI string of the molecule.

        Returns
        --------
        inchi_key
            The InChIKey representation of the molecule.
        """

        from openeye import oechem

        oemol = self.to_openeye(molecule)

        if fixed_hydrogens:
            opts = oechem.OEInChIOptions()
            opts.SetFixedHLayer(True)
            inchi_key = oechem.OEMolToInChIKey(oemol)

        else:
            inchi_key = oechem.OEMolToSTDInChIKey(oemol)

        return inchi_key

[docs]    def to_iupac(self, molecule: "Molecule") -> str:
        """Generate IUPAC name from Molecule

        Parameters
        ----------
        molecule
            The molecule to convert into a SMILES.

        Returns
        -------
        iupac_name
            IUPAC name of the molecule

        Examples
        --------

        >>> from openff.toolkit import Molecule
        >>> from openff.toolkit.utils import get_data_file_path
        >>> sdf_filepath = get_data_file_path('molecules/ethanol.sdf')
        >>> molecule = Molecule(sdf_filepath)
        >>> toolkit = OpenEyeToolkitWrapper()
        >>> iupac_name = toolkit.to_iupac(molecule)

        """
        from openeye import oeiupac

        oemol = self.to_openeye(molecule)

        return oeiupac.OECreateIUPACName(oemol)

[docs]    def canonical_order_atoms(self, molecule: "FrozenMolecule") -> "FrozenMolecule":
        """
        Canonical order the atoms in the molecule using the OpenEye toolkit.

        Parameters
        ----------
        molecule
            The input molecule

         Returns
        -------
        molecule
            The input molecule, with canonically-indexed atoms and bonds.
        """

        from openeye import oechem

        oemol = self.to_openeye(molecule)

        oechem.OECanonicalOrderAtoms(oemol)
        oechem.OECanonicalOrderBonds(oemol)

        # reorder the iterator
        vatm = []
        for atom in oemol.GetAtoms():
            if atom.GetAtomicNum() != oechem.OEElemNo_H:
                vatm.append(atom)
        oemol.OrderAtoms(vatm)

        vbnd = []
        for bond in oemol.GetBonds():
            if (
                bond.GetBgn().GetAtomicNum() != oechem.OEElemNo_H
                and bond.GetEnd().GetAtomicNum() != oechem.OEElemNo_H
            ):
                vbnd.append(bond)
        oemol.OrderBonds(vbnd)

        oemol.Sweep()

        for bond in oemol.GetBonds():
            if bond.GetBgnIdx() > bond.GetEndIdx():
                bond.SwapEnds()

        return self.from_openeye(
            oemol, allow_undefined_stereo=True, _cls=molecule.__class__
        )

[docs]    def from_smiles(
        self,
        smiles: str,
        hydrogens_are_explicit: bool = False,
        allow_undefined_stereo: bool = False,
        _cls=None,
        name: str = "",
    ) -> "FrozenMolecule":
        """
        Create a Molecule from a SMILES string using the OpenEye toolkit.

        .. warning :: This API is experimental and subject to change.

        Parameters
        ----------
        smiles
            The SMILES string to turn into a molecule
        hydrogens_are_explicit
            If False, OE will perform hydrogen addition using OEAddExplicitHydrogens
        allow_undefined_stereo
            Whether to accept SMILES with undefined stereochemistry. If False,
            an exception will be raised if a SMILES with undefined stereochemistry
            is passed into this function.
        _cls
            Molecule constructor
        name
            An optional name for the output molecule

        Returns
        -------
        molecule
            An OpenFF style molecule.

        Raises
        ------
        RadicalsNotSupportedError
        """
        from openeye import oechem

        oemol = oechem.OEGraphMol()
        if not oechem.OESmilesToMol(oemol, smiles):
            raise SMILESParseError("Unable to parse the SMILES string")
        if not (hydrogens_are_explicit):
            result = oechem.OEAddExplicitHydrogens(oemol)
            if not result:
                raise ValueError(
                    "Addition of explicit hydrogens failed in from_openeye"
                )
        elif hydrogens_are_explicit and oechem.OEHasImplicitHydrogens(oemol):
            raise ValueError(
                f"'hydrogens_are_explicit' was specified as True, but OpenEye Toolkit interpreted "
                f"SMILES '{smiles}' as having implicit hydrogen. If this SMILES is intended to "
                f"express all explicit hydrogens in the molecule, then you should construct the "
                f"desired molecule as an OEMol (where oechem.OEHasImplicitHydrogens(oemol) returns "
                f"False), and then use Molecule.from_openeye() to create the desired OFFMol."
            )

        # Set partial charges to None, since they couldn't have been stored in a SMILES
        for atom in oemol.GetAtoms():
            atom.SetPartialCharge(float("nan"))

        molecule = self.from_openeye(
            oemol,
            _cls=_cls,
            allow_undefined_stereo=allow_undefined_stereo,
        )
        molecule.name = name
        return molecule

[docs]    def from_inchi(
        self,
        inchi: str,
        allow_undefined_stereo: bool = False,
        _cls=None,
        name: str = "",
    ) -> "FrozenMolecule":
        """
        Construct a Molecule from a InChI representation

        Parameters
        ----------
        inchi
            The InChI representation of the molecule.
        allow_undefined_stereo
            Whether to accept InChI with undefined stereochemistry. If False,
            an exception will be raised if a InChI with undefined stereochemistry
            is passed into this function.
        _cls
            Molecule constructor
        name
            An optional name for the output molecule

        Returns
        -------
        molecule
        """

        from openeye import oechem

        # This calls the same functions as OESmilesToMol
        oemol = oechem.OEGraphMol()
        oechem.OEInChIToMol(oemol, inchi)

        # try and catch InChI parsing fails
        # if there are no atoms don't build the molecule
        if oemol.NumAtoms() == 0:
            raise InChIParseError(
                f"There was an issue parsing the InChI string ({inchi}), please check and try again."
            )

        molecule = self.from_openeye(
            oemol,
            allow_undefined_stereo=allow_undefined_stereo,
            _cls=_cls,
        )
        molecule.name = name

        return molecule

[docs]    def from_iupac(
        self,
        iupac_name: str,
        allow_undefined_stereo: bool = False,
        _cls=None,
        **kwargs,
    ) -> "FrozenMolecule":
        """
        Construct a Molecule from an IUPAC name

        Parameters
        ----------
        iupac_name
            The IUPAC or common name of the molecule.
        allow_undefined_stereo
            Whether to accept a molecule name with undefined stereochemistry. If False,
            an exception will be raised if a molecule name with undefined stereochemistry
            is passed into this function.
        _cls
            Molecule constructor

        Returns
        -------
        molecule

        """
        from openeye import oechem, oeiupac

        oemol = oechem.OEMol()
        parsing_result = oeiupac.OEParseIUPACName(oemol, iupac_name)
        if not parsing_result:
            raise InvalidIUPACNameError(
                f"OpenEye failed to parse {iupac_name} as a IUPAC name"
            )
        oechem.OETriposAtomNames(oemol)
        result = oechem.OEAddExplicitHydrogens(oemol)
        if not result:
            raise OpenEyeError("Addition of explicit hydrogens failed in from_iupac")

        molecule = self.from_openeye(
            oemol, allow_undefined_stereo=allow_undefined_stereo, _cls=_cls, **kwargs
        )

        return molecule

[docs]    def generate_conformers(
        self,
        molecule: "Molecule",
        n_conformers: int = 1,
        rms_cutoff: Optional[Quantity] = None,
        clear_existing: bool = True,
        make_carboxylic_acids_cis: bool = False,
    ):
        """
        Generate molecule conformers using OpenEye Omega.

        .. warning :: This API is experimental and subject to change.

        .. todo ::

            * which parameters should we expose? (or can we implement a general system with \*\*kwargs?)
            * will the coordinates be returned in the OpenFF Molecule's own indexing system? Or is there a chance that
              they'll get reindexed when we convert the input into an OEmol?

        Parameters
        ----------
        molecule
            The molecule to generate conformers for.
        n_conformers
            The maximum number of conformers to generate.
        rms_cutoff
            The minimum RMS value at which two conformers are considered redundant and one is deleted.
            If None, the cutoff is set to 1 Angstrom
        clear_existing
            Whether to overwrite existing conformers for the molecule
        make_carboxylic_acids_cis
            Guarantee all conformers have exclusively cis carboxylic acid groups (COOH)
            by rotating the proton in any trans carboxylic acids 180 degrees around the C-O bond.
        """
        import copy

        from openeye import oeomega

        # Copy the molecule and scrub the conformers so that omega HAS to read stereo from graph mol
        # See https://github.com/openforcefield/openff-toolkit/issues/1152
        mol_copy = copy.deepcopy(molecule)
        mol_copy._conformers = None

        oemol = self.to_openeye(mol_copy)
        omega = oeomega.OEOmega()
        omega.SetMaxConfs(n_conformers)
        omega.SetCanonOrder(False)
        omega.SetSampleHydrogens(True)
        omega.SetEnergyWindow(15.0)  # unit?
        if rms_cutoff is None:
            omega.SetRMSThreshold(1.0)
        else:
            omega.SetRMSThreshold(rms_cutoff.m_as(unit.angstrom))
        # Don't generate random stereoisomer if not specified
        omega.SetStrictStereo(True)
        status = omega(oemol)

        if status is False:
            omega.SetStrictStereo(False)
            new_status = omega(oemol)
            if new_status is False:
                raise ConformerGenerationError(
                    "OpenEye Omega conformer generation failed"
                )

        molecule2 = self.from_openeye(
            oemol, allow_undefined_stereo=True, _cls=molecule.__class__
        )

        if clear_existing:
            molecule._conformers = list()

        for conformer in molecule2._conformers:  # type: ignore[union-attr]
            molecule._add_conformer(conformer)

        if make_carboxylic_acids_cis:
            molecule._make_carboxylic_acids_cis(toolkit_registry=self)

[docs]    def apply_elf_conformer_selection(
        self,
        molecule: "Molecule",
        percentage: float = 2.0,
        limit: int = 10,
    ):
        """Applies the `ELF method
        <https://docs.eyesopen.com/toolkits/python/quacpactk/molchargetheory.html#elf-conformer-selection>`_
        to select a set of diverse
        conformers which have minimal electrostatically strongly interacting functional
        groups from a molecules conformers.

        Notes
        -----
        * The input molecule should have a large set of conformers already
          generated to select the ELF conformers from.
        * The selected conformers will be retained in the ``molecule.conformers`` list
          while unselected conformers will be discarded.
        * Conformers generated with the OpenEye toolkit often include trans
          carboxylic acids (COOH). These are unphysical and will be rejected by
          ``apply_elf_conformer_selection``. If no conformers are selected, try
          re-running ``generate_conformers`` with the ``make_carboxylic_acids_cis``
          argument set to ``True``

        See Also
        --------
        RDKitToolkitWrapper.apply_elf_conformer_selection

        Parameters
        ----------
        molecule
            The molecule which contains the set of conformers to select from.
        percentage
            The percentage of conformers with the lowest electrostatic interaction
            energies to greedily select from.
        limit
            The maximum number of conformers to select.
        """

        from openeye import oechem, oequacpac

        if molecule.n_conformers == 0:
            return

        oe_molecule = molecule.to_openeye()

        # Select a subset of the OMEGA generated conformers using the ELF10 method.
        oe_elf_options = oequacpac.OEELFOptions()
        oe_elf_options.SetElfLimit(limit)
        oe_elf_options.SetPercent(percentage)

        oe_elf = oequacpac.OEELF(oe_elf_options)

        output_stream = oechem.oeosstream()

        oechem.OEThrow.SetOutputStream(output_stream)
        oechem.OEThrow.Clear()

        status = oe_elf.Select(oe_molecule)

        oechem.OEThrow.SetOutputStream(oechem.oeerr)

        output_string = output_stream.str().decode("UTF-8")
        output_string = output_string.replace("Warning: ", "")
        output_string = re.sub("^: +", "", output_string, flags=re.MULTILINE)
        output_string = re.sub("\n$", "", output_string)

        # Check to make sure the call to OE was succesful, and re-route any
        # non-fatal warnings to the correct logger.
        if output_string and not status:
            raise RuntimeError("\n" + output_string)
        elif not status:
            raise RuntimeError(
                "OpenEye failed to select conformers, but did not return any output. "
                "This most commonly occurs when the Molecule does not have enough conformers to "
                "select from. Try calling Molecule.apply_elf_conformer_selection() again after "
                "running Molecule.generate_conformers() with a much larger value of n_conformers "
                "or with make_carboxylic_acids_cis=True."
            )
        elif output_string:
            logger.warning(output_string)

        # Extract and store the ELF conformers on the input molecule.
        conformers = []

        for oe_conformer in oe_molecule.GetConfs():
            conformer = np.zeros((oe_molecule.NumAtoms(), 3))

            for atom_index, coordinates in oe_conformer.GetCoords().items():
                conformer[atom_index, :] = coordinates

            conformers.append(Quantity(conformer, unit.angstrom))

        molecule._conformers = conformers

[docs]    def assign_partial_charges(
        self,
        molecule: "Molecule",
        partial_charge_method: Optional[str] = None,
        use_conformers: Optional[list[Quantity]] = None,
        strict_n_conformers: bool = False,
        normalize_partial_charges: bool = True,
        _cls=None,
    ):
        """
        Compute partial charges with OpenEye quacpac, and assign
        the new values to the partial_charges attribute.

        .. warning :: This API is experimental and subject to change.

        .. todo ::

           * Should the default be ELF?
           * Can we expose more charge models?


        Parameters
        ----------
        molecule
            Molecule for which partial charges are to be computed
        partial_charge_method
            The charge model to use. One of ['amberff94', 'mmff', 'mmff94', 'am1-mulliken', 'am1bcc',
            'am1bccnosymspt', 'am1bccelf10', 'gasteiger']
            If None, 'am1-mulliken' will be used.
        use_conformers
            shape (n_atoms, 3) and dimension of distance. Optional, default = None
            Coordinates to use for partial charge calculation. If None, an appropriate number
            of conformers will be generated.
        strict_n_conformers
            Whether to raise an exception if an invalid number of conformers is provided for the
            given charge method.
            If this is False and an invalid number of conformers is found, a warning will be raised.
        normalize_partial_charges
            Whether to offset partial charges so that they sum to the total formal charge of the molecule.
            This is used to prevent accumulation of rounding errors when the partial charge generation method has
            low precision.
        _cls
            Molecule constructor

        Raises
        ------
        ChargeMethodUnavailableError if the requested charge method can not be handled by this toolkit

        ChargeCalculationError if the charge method is supported by this toolkit, but fails
        """

        import numpy as np
        from openeye import oechem, oequacpac

        from openff.toolkit.topology import Molecule

        if partial_charge_method is None:
            partial_charge_method = "am1-mulliken"

        partial_charge_method = partial_charge_method.lower()

        if partial_charge_method not in self._supported_charge_methods:
            raise ChargeMethodUnavailableError(
                f"partial_charge_method '{partial_charge_method}' is not available from OpenEyeToolkitWrapper. "
                f"Available charge methods are {self.supported_charge_methods}"
            )

        charge_method: _ChargeSettings = self._supported_charge_methods[
            partial_charge_method
        ]

        if _cls is None:
            _cls = Molecule

        # Make a temporary copy of the molecule, since we'll be messing with its conformers
        mol_copy = _cls(molecule)

        if use_conformers is None:
            if charge_method["rec_confs"] == 0:
                mol_copy._conformers = None
            else:
                self.generate_conformers(
                    mol_copy,
                    n_conformers=charge_method["rec_confs"],
                    rms_cutoff=0.25 * unit.angstrom,
                    make_carboxylic_acids_cis=True,
                )
                # TODO: What's a "best practice" RMS cutoff to use here?
        else:
            mol_copy._conformers = None
            for conformer in use_conformers:
                mol_copy._add_conformer(conformer)
            self._check_n_conformers(
                mol_copy,
                partial_charge_method=partial_charge_method,
                min_confs=charge_method["min_confs"],
                max_confs=charge_method["max_confs"],
                strict_n_conformers=strict_n_conformers,
            )

        oemol = mol_copy.to_openeye()

        errfs = oechem.oeosstream()
        oechem.OEThrow.SetOutputStream(errfs)
        oechem.OEThrow.Clear()

        oe_charge_method = getattr(oequacpac, charge_method["oe_charge_method"])

        # The OpenFF toolkit has always supported a version of AM1BCC with no geometry optimization
        # or symmetry correction. So we include this keyword to provide a special configuration of quacpac
        # if requested.
        if partial_charge_method == "am1bccnosymspt":
            kwargs = {
                "optimize": False,
                "symmetrize": False,
            }
        elif partial_charge_method in ["gasteiger", "mmff94", "am1bccelf10"]:
            # symmetrize is implicit in gasteiger and mmff and is already set to True in am1bccelf10
            kwargs = dict()
        else:
            kwargs = {"symmetrize": True}

        if partial_charge_method == "am1elf10":
            # special case
            charge_settings = oequacpac.OEELFCharges(
                oequacpac.OEAM1Charges(
                    optimize=True,
                    symmetrize=True,
                ),
                10,
            )

        else:
            charge_settings = oe_charge_method(**kwargs)

        quacpac_status = oequacpac.OEAssignCharges(
            oemol,
            charge_settings,
        )

        oechem.OEThrow.SetOutputStream(oechem.oeerr)  # restoring to original state
        # This logic handles errors encountered in #34, which can occur when using ELF10 conformer selection
        if not quacpac_status:
            oe_charge_engine = (
                oequacpac.OEAM1Charges
                if partial_charge_method == "am1elf10"
                else oequacpac.OEAM1BCCCharges
            )

            if "SelectElfPop: issue with removing trans COOH conformers" in (
                errfs.str().decode("UTF-8")
            ):
                logger.warning(
                    f"Warning: charge assignment involving ELF10 conformer selection failed due to a "
                    f"known bug (toolkit issue #346). Downgrading to {oe_charge_engine.__name__} "
                    f"charge assignment for this molecule. More information "
                    f"is available at https://github.com/openforcefield/openff-toolkit/issues/346"
                )
                quacpac_status = oequacpac.OEAssignCharges(oemol, oe_charge_engine())

        if quacpac_status is False:
            raise ChargeCalculationError(
                f'Unable to assign charges: {errfs.str().decode("UTF-8")}'
            )

        # Extract and return charges
        # TODO: Make sure atom mapping remains constant
        # Extract the list of charges, taking into account possible indexing differences
        charges = Quantity(
            np.zeros(shape=oemol.NumAtoms(), dtype=np.float64), unit.elementary_charge
        )
        for oeatom in oemol.GetAtoms():
            index = oeatom.GetIdx()
            charge = oeatom.GetPartialCharge()
            charge = charge * unit.elementary_charge
            charges[index] = charge

        molecule.partial_charges = charges

        if normalize_partial_charges:
            molecule._normalize_partial_charges()

[docs]    def assign_fractional_bond_orders(
        self,
        molecule: "Molecule",
        bond_order_model: Optional[str] = None,
        use_conformers: Optional[list[Quantity]] = None,
        _cls=None,
    ):
        """
        Update and store list of bond orders this molecule. Bond orders are stored on each
        bond, in the `bond.fractional_bond_order` attribute.

        .. warning :: This API is experimental and subject to change.

        Parameters
        ----------
        molecule
            The molecule to assign wiberg bond orders to
        bond_order_model
            The charge model to use. One of ['am1-wiberg', 'am1-wiberg-elf10',
            'pm3-wiberg', 'pm3-wiberg-elf10']. If None, 'am1-wiberg' will be used.
        use_conformers
            The conformers to use for fractional bond order calculation. If None, an
            appropriate number of conformers will be generated by an available
            ToolkitWrapper. If the chosen ``bond_order_model`` is an ELF variant, the ELF
            conformer selection method will be applied to the provided conformers.
        _cls
            Molecule constructor
        """
        from openeye import oechem, oequacpac

        if _cls is None:
            from openff.toolkit.topology.molecule import Molecule

            _cls = Molecule

        # Make a copy since we'll be messing with this molecule's conformers
        temp_mol = _cls(molecule)

        if bond_order_model is None:
            bond_order_model = "am1-wiberg"

        bond_order_model = bond_order_model.lower()

        is_elf_method = bond_order_model in ["am1-wiberg-elf10", "pm3-wiberg-elf10"]

        if use_conformers is None:
            self.generate_conformers(
                molecule=temp_mol,
                n_conformers=1 if not is_elf_method else 500,
                # 0.05 is the recommended RMS when generating a 'Dense' amount of
                # conformers using Omega: https://docs.eyesopen.com/toolkits/python/
                # omegatk/OEConfGenConstants/OEFragBuilderMode.html.
                rms_cutoff=None if not is_elf_method else 0.05 * unit.angstrom,
                make_carboxylic_acids_cis=True,
            )
        else:
            temp_mol._conformers = None
            for conformer in use_conformers:
                temp_mol._add_conformer(conformer)
        if temp_mol.n_conformers == 0:
            raise ValueError(
                "No conformers present in molecule submitted for fractional bond order calculation. Consider "
                "loading the molecule from a file with geometry already present or running "
                "molecule.generate_conformers() before calling molecule.compute_wiberg_bond_orders()"
            )

        if is_elf_method:
            # Apply the ELF10 conformer selection method.
            temp_mol.apply_elf_conformer_selection()

        # Set the options to use when computing the WBOs. This is based on example at
        # https://docs.eyesopen.com/toolkits/python/quacpactk/examples_summary_wibergbondorders.html
        am1 = oequacpac.OEAM1()

        am1results = oequacpac.OEAM1Results()
        am1options = am1.GetOptions()

        if bond_order_model.startswith("am1-wiberg"):
            am1options.SetSemiMethod(oequacpac.OEMethodType_AM1)
        elif bond_order_model.startswith("pm3-wiberg"):
            # TODO: Make sure that modifying am1options actually works
            am1options.SetSemiMethod(oequacpac.OEMethodType_PM3)
        else:
            raise ValueError(
                f"Bond order model '{bond_order_model}' is not supported by "
                f"OpenEyeToolkitWrapper. Supported models are ['am1-wiberg', "
                f"'am1-wiberg-elf10', 'pm3-wiberg', 'pm3-wiberg-elf10']."
            )

        # Convert the conformers into OE friendly objects to make setting them one
        # at a time easier.
        oe_conformers = [
            oechem.OEFloatArray(conformer.m_as(unit.angstrom).flatten())
            for conformer in temp_mol.conformers
        ]

        oemol = self.to_openeye(temp_mol)
        bond_orders = defaultdict(list)

        for oe_conformer in oe_conformers:
            oemol.DeleteConfs()
            oemol.NewConf(oe_conformer)

            status = am1.CalcAM1(am1results, oemol)

            if status is False:
                raise OpenEyeError(
                    "Unable to assign charges (in the process of calculating "
                    "fractional bond orders)"
                )

            for bond in oemol.GetBonds():
                bond_orders[bond.GetIdx()].append(
                    am1results.GetBondOrder(bond.GetBgnIdx(), bond.GetEndIdx())
                )

        # TODO: Will bonds always map back to the same index? Consider doing a
        #       topology mapping.
        for bond_idx, conformer_bond_orders in bond_orders.items():
            # Get bond order
            order = np.mean(conformer_bond_orders)

            mol_bond = molecule._bonds[bond_idx]
            mol_bond.fractional_bond_order = order

[docs]    def get_tagged_smarts_connectivity(self, smarts: str) -> tuple[TTA, ...]:
        """
        Returns a tuple of tuples indicating connectivity between tagged atoms in a SMARTS string. Does not
        return bond order.

        Parameters
        ----------
        smarts
            The tagged SMARTS to analyze

        Returns
        -------
        unique_tags
            A sorted tuple of all unique tagged atom map indices.
        tagged_atom_connectivity
            A tuple of tuples, where each inner tuple is a pair of tagged atoms
            (tag_idx_1, tag_idx_2) which are bonded. The inner tuples are ordered
            smallest-to-largest, and the tuple of tuples is ordered lexically. The
            return value for an improper torsion would be ((1, 2), (2, 3), (2, 4)).

        Raises
        ------
        SMIRKSParsingError
            If OpenEye toolkit was unable to parse the provided smirks/tagged smarts
        """
        from openeye import oechem

        from openff.toolkit.utils.exceptions import SMIRKSParsingError

        qmol = oechem.OEQMol()
        status = oechem.OEParseSmarts(qmol, smarts)
        if not status:
            raise SMIRKSParsingError(
                f"OpenEye Toolkit was unable to parse SMIRKS {smarts}"
            )

        _unique_tags = set()
        _connections = set()
        for at1 in qmol.GetAtoms():
            if at1.GetMapIdx() == 0:
                continue
            _unique_tags.add(at1.GetMapIdx())
            for at2 in at1.GetAtoms():
                if at2.GetMapIdx() == 0:
                    continue
                cxn_to_add = sorted([at1.GetMapIdx(), at2.GetMapIdx()])
                _connections.add(tuple(cxn_to_add))
        connections: TTA = tuple(sorted(list(_connections)))
        unique_tags: TTA = tuple(sorted(list(_unique_tags)))
        return unique_tags, connections

    @staticmethod
    def _find_smarts_matches(
        oemol,
        smarts: str,
        aromaticity_model=DEFAULT_AROMATICITY_MODEL,
        unique: bool = False,
    ) -> list[tuple[int, ...]]:
        """Find all sets of atoms in the provided OpenEye molecule that match the provided SMARTS string.

        Parameters
        ----------
        oemol
            oemol to process with the SMIRKS in order to find matches
        smarts
            SMARTS string with any number of sequentially tagged atoms.
            If there are N tagged atoms numbered 1..N, the resulting matches will be N-tuples of
            atoms that match the corresponding tagged atoms.
        aromaticity_model
            The aromaticity model to use. Only OEAroModel_MDL is supported.
            Molecule is prepared with this aromaticity model prior to querying.
        unique
            If True, only return unique matches. If False, return all matches. This is passed to
            OpenEye's ``OESubSearch`` as ``uniquematch``.

        Returns
        -------
        matches
            matches[index] is an N-tuple of atom numbers from the ``oemol``
            Matches are returned in no guaranteed order.
            # TODO: What is returned if no matches are found? An empty list, or None?
            # TODO: Ensure that SMARTS numbers 1, 2, 3... are rendered into order of returned
            #    matches indexed by 0, 1, 2...

        .. notes ::

           * Raises ``LicenseError`` if valid OpenEye tools license is not found, rather than
               causing program to terminate
           * Raises ``ChemicalEnvironmentParsingError`` if ``smarts`` query is malformed

        """
        from openeye import oechem
        from openeye.oechem import OESubSearch

        from openff.toolkit.utils.exceptions import ChemicalEnvironmentParsingError

        # Make a copy of molecule so we don't influence original (probably safer than deepcopy per C Bayly)
        mol = oechem.OEMol(oemol)
        # Set up query
        qmol = oechem.OEQMol()
        if not oechem.OEParseSmarts(qmol, smarts):
            raise ChemicalEnvironmentParsingError(
                f'OpenEye could not parse the SMARTS/SMIRKS string "{smarts}"'
            )

        # OEPrepareSearch will clobber our desired aromaticity model if we don't sync up mol
        # and qmol ahead of time.

        oechem.OEClearAromaticFlags(qmol)

        if aromaticity_model == "OEAroModel_MDL":
            oechem.OEAssignAromaticFlags(qmol, oechem.OEAroModel_MDL)
        else:
            raise InvalidAromaticityModelError(
                f"Given aromaticity model {aromaticity_model} which is not in the set of allowed aromaticity models:  "
                f"{ALLOWED_AROMATICITY_MODELS}"
            )

        # oechem.OEAssignHybridization(mol)
        oechem.OEAssignHybridization(qmol)

        # Build list of matches
        # TODO: The MoleculeImage mapping should preserve ordering of template molecule for equivalent atoms
        #       and speed matching for larger molecules.
        substructure_search = OESubSearch(qmol)
        # TODO: max_matches = int(max_matches) if max_matches is not None else 0
        max_matches = 0
        substructure_search.SetMaxMatches(max_matches)
        oechem.OEPrepareSearch(mol, substructure_search)
        matches = list()
        for match in substructure_search.Match(mol, unique):
            # Compile list of atom indices that match the pattern tags
            atom_indices: dict[int, int] = dict()
            for matched_atom in match.GetAtoms():
                if matched_atom.pattern.GetMapIdx() != 0:
                    atom_indices[matched_atom.pattern.GetMapIdx() - 1] = (
                        matched_atom.target.GetIdx()
                    )

            # Compress into tuple
            matches.append(
                tuple(atom_indices[index] for index in range(len(atom_indices)))
            )

        return matches

[docs]    def find_smarts_matches(
        self,
        molecule: "Molecule",
        smarts: str,
        aromaticity_model=DEFAULT_AROMATICITY_MODEL,
        unique=False,
    ) -> list[tuple[int, ...]]:
        """
        Find all SMARTS matches for the specified molecule, using the specified aromaticity model.

        .. warning :: This API is experimental and subject to change.

        Parameters
        ----------
        molecule
            The molecule for which all specified SMARTS matches are to be located
        smarts
            SMARTS string with optional SMIRKS-style atom tagging
        aromaticity_model
            The aromaticity model to use. Only OEAroModel_MDL is supported.
        unique
            If True, only return unique matches. If False, return all matches.

        .. note :: Currently, the only supported ``aromaticity_model`` is ``OEAroModel_MDL``

        """
        oemol, _ = self._connection_table_to_openeye(molecule)
        return self._find_smarts_matches(
            oemol,
            smarts,
            aromaticity_model=aromaticity_model,
            unique=unique,
        )


def requires_openeye_module(module_name):
    def inner_decorator(function):
        @wraps(function)
        def wrapper(*args, **kwargs):
            try:
                module = importlib.import_module("openeye." + module_name)
            except ImportError:
                raise OpenEyeImportError("openeye." + module_name)
            try:
                license_func = OpenEyeToolkitWrapper._license_functions[module_name]
            except KeyError:
                raise OpenEyeImportError(f"we do not currently use {module_name}")

            # TODO: Custom exception
            assert getattr(module, license_func)()

            return function(*args, **kwargs)

        return wrapper

    return inner_decorator