Source code for openff.nagl.toolkits.openeye
import copy
from typing import Tuple, TYPE_CHECKING, List, Union
import numpy as np
from openff.units import unit
from openff.nagl.toolkits._base import NAGLToolkitWrapperBase
from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper
from openff.nagl.utils._types import HybridizationType
if TYPE_CHECKING:
from openff.toolkit.topology import Molecule
[docs]class NAGLOpenEyeToolkitWrapper(NAGLToolkitWrapperBase, OpenEyeToolkitWrapper):
name = "openeye"
def _run_normalization_reactions(
self,
molecule: "Molecule",
normalization_reactions: Tuple[str, ...] = tuple(),
**kwargs,
):
"""
Normalize the bond orders and charges of a molecule by applying a series of transformations to it.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to normalize
normalization_reactions: Tuple[str, ...], default=tuple()
A tuple of SMARTS reaction strings representing the reactions to apply to the molecule.
Returns
-------
normalized_molecule: openff.toolkit.topology.Molecule
The normalized molecule. This is a new molecule object, not the same as the input molecule.
"""
from openeye import oechem
oemol = self.to_openeye(molecule=molecule)
for reaction_smarts in normalization_reactions:
reaction = oechem.OEUniMolecularRxn(reaction_smarts)
reaction(oemol)
molecule = self.from_openeye(
oemol,
allow_undefined_stereo=True,
_cls=molecule.__class__,
)
return molecule
[docs] def get_molecule_hybridizations(
self, molecule: "Molecule"
) -> List[HybridizationType]:
"""
Get the hybridization of each atom in a molecule.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to get the hybridizations of.
Returns
-------
hybridizations: List[HybridizationType]
The hybridization of each atom in the molecule.
"""
from openeye import oechem
conversions = {
oechem.OEHybridization_Unknown: HybridizationType.OTHER,
oechem.OEHybridization_sp: HybridizationType.SP,
oechem.OEHybridization_sp2: HybridizationType.SP2,
oechem.OEHybridization_sp3: HybridizationType.SP3,
oechem.OEHybridization_sp3d: HybridizationType.SP3D,
oechem.OEHybridization_sp3d2: HybridizationType.SP3D2,
}
hybridizations = []
oemol = self.to_openeye(molecule=molecule)
oechem.OEAssignHybridization(oemol)
for atom in oemol.GetAtoms():
hybridization = atom.GetHyb()
try:
hybridizations.append(conversions[hybridization])
except KeyError:
raise ValueError(f"Unknown hybridization {hybridization}")
return hybridizations
def _molecule_from_openeye(
self,
oemol,
as_smiles: bool = False,
mapped_smiles: bool = False,
):
"""
Create a Molecule from an OpenEye OEMol with charges
Parameters
----------
oemol: openeye.oechem.OEMol
The molecule to create a Molecule from
as_smiles: bool, default=False
If True, return a SMILES string instead of an OpenFF Molecule
mapped_smiles: bool, default=False
If True, return a SMILES string with atom indices as atom map numbers.
Returns
-------
molecule: openff.toolkit.topology.Molecule or str
"""
from openff.toolkit.topology import Molecule
has_charges = (
OpenEyeToolkitWrapper._turn_oemolbase_sd_charges_into_partial_charges(oemol)
)
offmol = self.from_openeye(
oemol,
allow_undefined_stereo=True,
_cls=Molecule,
)
if not has_charges:
offmol.partial_charges = None
if as_smiles:
return offmol.to_smiles(mapped=mapped_smiles)
return offmol
[docs] def stream_molecules_from_sdf_file(
self,
file: str,
as_smiles: bool = False,
mapped_smiles: bool = False,
include_sdf_data: bool = True,
**kwargs,
):
"""
Stream molecules from an SDF file.
Parameters
----------
file: str
The path to the SDF file to stream molecules from.
as_smiles: bool, default=False
If True, return a SMILES string instead of an OpenFF Molecule
mapped_smiles: bool, default=False
If True, return a SMILES string with atom indices as atom map numbers.
include_sdf_data: bool, default=True
If True, include the SDF tag data in the output molecules.
Returns
-------
molecules: Generator[openff.toolkit.topology.Molecule or str]
"""
from openeye import oechem
stream = oechem.oemolistream()
stream.open(file)
is_sdf = stream.GetFormat() == oechem.OEFormat_SDF
for oemol in stream.GetOEMols():
if is_sdf and hasattr(oemol, "GetConfIter"):
for conf in oemol.GetConfIter():
confmol = conf.GetMCMol()
if include_sdf_data and not as_smiles:
for dp in oechem.OEGetSDDataPairs(oemol):
oechem.OESetSDData(confmol, dp.GetTag(), dp.GetValue())
for dp in oechem.OEGetSDDataPairs(conf):
oechem.OESetSDData(confmol, dp.GetTag(), dp.GetValue())
else:
confmol = oemol
yield self._molecule_from_openeye(
confmol, as_smiles=as_smiles, mapped_smiles=mapped_smiles
)
[docs] def to_openeye(self, molecule: "Molecule"):
"""
Convert an OpenFF Molecule to an OpenEye OEMol with charges
stored as SD data.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to convert
Returns
-------
oemol: openeye.oechem.OEMol
"""
from openeye import oechem
oemol = super().to_openeye(molecule)
if molecule.partial_charges is not None:
partial_charges_list = [
oeatom.GetPartialCharge() for oeatom in oemol.GetAtoms()
]
partial_charges_str = " ".join([f"{val:f}" for val in partial_charges_list])
oechem.OESetSDData(oemol, "atom.dprop.PartialCharge", partial_charges_str)
return oemol
[docs] def stream_molecules_to_file(self, file: str):
"""
Stream molecules to an SDF file using a context manager.
Parameters
----------
file: str
The path to the SDF file to stream molecules to.
Examples
--------
>>> from openff.toolkit.topology import Molecule
>>> from openff.toolkit.utils.toolkits import OpenEyeToolkitWrapper
>>> toolkit_wrapper = OpenEyeToolkitWrapper()
>>> molecule1 = Molecule.from_smiles("CCO")
>>> molecule2 = Molecule.from_smiles("CCC")
>>> with toolkit_wrapper.stream_molecules_to_file("molecules.sdf") as writer:
... writer(molecule1)
... writer(molecule2)
"""
from openeye import oechem
from openff.toolkit.topology import Molecule
stream = oechem.oemolostream(file)
def writer(molecule: Molecule):
oechem.OEWriteMolecule(stream, self.to_openeye(molecule))
yield writer
stream.close()
[docs] def get_best_rmsd(
self,
molecule: "Molecule",
reference_conformer: Union[np.ndarray, unit.Quantity],
target_conformer: Union[np.ndarray, unit.Quantity],
) -> unit.Quantity:
"""
Compute the lowest all-atom RMSD between a reference and target conformer,
allowing for symmetry-equivalent atoms to be permuted.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to compute the RMSD for
reference_conformer: np.ndarray or openff.units.unit.Quantity
The reference conformer to compare to the target conformer.
If a numpy array, it is assumed to be in units of angstrom.
target_conformer: np.ndarray or openff.units.unit.Quantity
The target conformer to compare to the reference conformer.
If a numpy array, it is assumed to be in units of angstrom.
Returns
-------
rmsd: unit.Quantity
Examples
--------
>>> from openff.units import unit
>>> from openff.toolkit.topology import Molecule
>>> from openff.toolkit.utils.toolkits import OpenEyeToolkitWrapper
>>> toolkit_wrapper = OpenEyeToolkitWrapper()
>>> molecule = Molecule.from_smiles("CCCCO")
>>> molecule.generate_conformers(n_conformers=2)
>>> rmsd = toolkit_wrapper.get_best_rmsd(molecule, molecule.conformers[0], molecule.conformers[1])
>>> print(f"RMSD in angstrom: {rmsd.m_as(unit.angstrom)}")
"""
from openeye import oechem
if not isinstance(reference_conformer, unit.Quantity):
reference_conformer = reference_conformer * unit.angstrom
if not isinstance(target_conformer, unit.Quantity):
target_conformer = target_conformer * unit.angstrom
mol1 = copy.deepcopy(molecule)
mol1._conformers = [reference_conformer]
mol2 = copy.deepcopy(molecule)
mol2._conformers = [target_conformer]
oemol1 = self.to_openeye(mol1)
oemol2 = self.to_openeye(mol2)
# OERMSD(OEMolBase ref, OEMolBase fit, bool automorph=True, bool heavyOnly=True, bool overlay=False, double * rot=None, double * trans=None) -> double
rmsd = oechem.OERMSD(oemol1, oemol2, True, False, True)
return rmsd * unit.angstrom
[docs] def get_atoms_are_in_ring_size(
self,
molecule: "Molecule",
ring_size: int,
) -> List[bool]:
"""
Determine whether each atom in a molecule is in a ring of a given size.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to compute ring perception for
ring_size: int
The size of the ring to check for.
Returns
-------
in_ring_size: List[bool]
"""
from openeye import oechem
oemol = self.to_openeye(molecule)
oechem.OEFindRingAtomsAndBonds(oemol)
in_ring_size = [
oechem.OEAtomIsInRingSize(atom, ring_size) for atom in oemol.GetAtoms()
]
return in_ring_size
[docs] def get_bonds_are_in_ring_size(
self,
molecule: "Molecule",
ring_size: int,
) -> List[bool]:
"""
Determine whether each bond in a molecule is in a ring of a given size.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to compute ring perception for
ring_size: int
The size of the ring to check for.
Returns
-------
in_ring_size: List[bool]
Bonds are in the same order as the molecule's ``bonds`` attribute.
"""
from openeye import oechem
oemol = self.to_openeye(molecule)
oechem.OEFindRingAtomsAndBonds(oemol)
is_in_ring_size = [None] * len(molecule.bonds)
for oebond in oemol.GetBonds():
oe_i = oebond.GetBgnIdx()
oe_j = oebond.GetEndIdx()
off_bond = molecule.get_bond_between(oe_i, oe_j)
bond_index = off_bond.molecule_bond_index
is_in_ring_size[bond_index] = oechem.OEBondIsInRingSize(oebond, ring_size)
return is_in_ring_size
# TODO: this only outputs 0 or 1.
# def calculate_circular_fingerprint_similarity(
# self,
# molecule: "Molecule",
# reference_molecule: "Molecule",
# radius: int = 3,
# nbits: int = 2048,
# ) -> float:
# """
# Compute the similarity between two molecules using a fingerprinting method.
# Uses a Morgan fingerprint with RDKit and a Circular fingerprint with OpenEye.
# Parameters
# ----------
# molecule: openff.toolkit.topology.Molecule
# The molecule to compute the fingerprint for.
# reference_molecule: openff.toolkit.topology.Molecule
# The molecule to compute the fingerprint for.
# radius: int, default 3
# The radius of the fingerprint to use.
# nbits: int, default 2048
# The length of the fingerprint to use. Not used in RDKit.
# Returns
# -------
# similarity: float
# The Dice similarity between the two molecules.
# """
# from openeye import oegraphsim
# oegraphsim.OEFPBondType_DefaultCircularBond
# # Connectivity: (Element, #heavy neighbors, #Hs, charge, isotope, inRing
# # Donor, Acceptor, Aromatic, Halogen, Basic, Acidic
# atypes = (
# oegraphsim.OEFPAtomType_AtomicNumber
# | oegraphsim.OEFPAtomType_HvyDegree
# | oegraphsim.OEFPAtomType_HCount
# | oegraphsim.OEFPAtomType_FormalCharge
# | oegraphsim.OEFPAtomType_InRing
# | oegraphsim.OEFPAtomType_Chiral
# | oegraphsim.OEFPAtomType_EqHBondDonor
# | oegraphsim.OEFPAtomType_EqHBondAcceptor
# | oegraphsim.OEFPAtomType_EqAromatic
# | oegraphsim.OEFPAtomType_EqHalogen
# )
# btypes = oegraphsim.OEFPBondType_BondOrder
# oemol1 = self.to_openeye(molecule)
# oemol2 = self.to_openeye(reference_molecule)
# fp1 = oegraphsim.OEFingerPrint()
# oegraphsim.OEMakeCircularFP(fp1, oemol1, nbits, radius, radius, atypes, btypes)
# fp2 = oegraphsim.OEFingerPrint()
# oegraphsim.OEMakeCircularFP(fp2, oemol2, nbits, radius, radius, atypes, btypes)
# similarity = oegraphsim.OEDice(fp1, fp2)
# return similarity
# import contextlib
# from openff.utilities import requires_package
# @contextlib.contextmanager
# @requires_package("openeye.oechem")
# def capture_oechem_warnings(): # pragma: no cover
# from openeye import oechem
# output_stream = oechem.oeosstream()
# oechem.OEThrow.SetOutputStream(output_stream)
# oechem.OEThrow.Clear()
# yield
# oechem.OEThrow.SetOutputStream(oechem.oeerr)
# @requires_package("openeye.oechem")
# def _normalize_molecule_oe(
# molecule: "Molecule", reaction_smarts: List[str]
# ) -> "Molecule": # pragma: no cover
# from openeye import oechem
# from openff.toolkit.topology import Molecule
# oe_molecule: oechem.OEMol = molecule.to_openeye()
# for pattern in reaction_smarts:
# reaction = oechem.OEUniMolecularRxn(pattern)
# reaction(oe_molecule)
# return Molecule.from_openeye(oe_molecule, allow_undefined_stereo=True)