"""
Components to expand stereochemistry and tautomeric states of molecules.
"""
from typing import List, Optional, Tuple
from openff.toolkit.topology import Molecule
from openff.toolkit.utils import ToolkitRegistry
from qcelemental.util import which_import
from typing_extensions import Literal
from openff.qcsubmit._pydantic import Field
from openff.qcsubmit.common_structures import ComponentProperties
from openff.qcsubmit.utils import (
check_missing_stereo,
get_symmetry_classes,
get_symmetry_group,
)
from openff.qcsubmit.workflow_components.base_component import (
CustomWorkflowComponent,
ToolkitValidator,
)
from openff.qcsubmit.workflow_components.utils import (
ComponentResult,
ImproperScan,
Scan1D,
Scan2D,
TorsionIndexer,
)
[docs]class EnumerateTautomers(ToolkitValidator, CustomWorkflowComponent):
"""
Enumerate the tautomers of a molecule using the backend toolkits through the OFFTK.
"""
type: Literal["EnumerateTautomers"] = "EnumerateTautomers"
# custom settings for the class
max_tautomers: int = Field(
20, description="The maximum number of tautomers that should be generated."
)
[docs] @classmethod
def description(cls) -> str:
return "Enumerate the tautomers of a molecule if possible, returning the input plus any new molecules."
[docs] @classmethod
def fail_reason(cls) -> str:
return "The molecule tautomers could not be enumerated."
[docs] @classmethod
def properties(cls) -> ComponentProperties:
return ComponentProperties(process_parallel=True, produces_duplicates=True)
def _apply(
self, molecules: List[Molecule], toolkit_registry: ToolkitRegistry
) -> ComponentResult:
"""
Enumerate tautomers of the input molecule.
The input molecules tautomers are enumerated using the desired backend toolkit and are returned along with the input molecule.
Parameters:
molecules: The list of molecules the component should be applied on.
toolkit_registry: The openff.toolkit.utils.ToolkitRegistry which declares the available toolkits.
Returns:
A [ComponentResult][qcsubmit.datasets.ComponentResult] instance containing information about the molecules
that passed and were filtered by the component and details about the component which generated the result.
"""
result = self._create_result(toolkit_registry=toolkit_registry)
for molecule in molecules:
try:
tautomers = molecule.enumerate_tautomers(
max_states=self.max_tautomers, toolkit_registry=toolkit_registry
)
for tautomer in tautomers:
result.add_molecule(tautomer)
# add the input molecule
result.add_molecule(molecule=molecule)
except Exception:
result.filter_molecule(molecule)
return result
[docs]class EnumerateStereoisomers(ToolkitValidator, CustomWorkflowComponent):
"""
Enumerate the stereo centers and bonds of a molecule using the backend toolkits through the OFFTK, only well defined
molecules are returned by this component, this is check via a OFFTK round trip.
"""
type: Literal["EnumerateStereoisomers"] = "EnumerateStereoisomers"
undefined_only: bool = Field(
False,
description="If we should only enumerate parts of the molecule with undefined stereochemistry or all stereochemistry.",
)
max_isomers: int = Field(
20, description="The maximum number of stereoisomers to be generated."
)
rationalise: bool = Field(
True,
description="If we should check that the resulting molecules are physically possible by attempting to generate conformers for them.",
)
[docs] @classmethod
def description(cls) -> str:
return "Enumerate the stereo centers and bonds of the molecule, returing the input molecule if valid and any new molecules."
[docs] @classmethod
def fail_reason(cls) -> str:
return "The molecules stereo centers or bonds could not be enumerated."
[docs] @classmethod
def properties(cls) -> ComponentProperties:
return ComponentProperties(process_parallel=True, produces_duplicates=True)
def _apply(
self, molecules: List[Molecule], toolkit_registry: ToolkitRegistry
) -> ComponentResult:
"""
Enumerate stereo centers and bonds of the input molecule
Parameters:
molecules: The list of molecules the component should be applied on.
toolkit_registry: The openff.toolkit.utils.ToolkitRegistry which declares the available toolkits.
Returns:
A [ComponentResult][qcsubmit.datasets.ComponentResult] instance containing information about the molecules
that passed and were filtered by the component and details about the component which generated the result.
"""
result = self._create_result(toolkit_registry=toolkit_registry)
for molecule in molecules:
try:
isomers = molecule.enumerate_stereoisomers(
undefined_only=self.undefined_only,
max_isomers=self.max_isomers,
rationalise=self.rationalise,
toolkit_registry=toolkit_registry,
)
# now check that each molecule is well defined
for isomer in isomers:
if not check_missing_stereo(isomer):
result.add_molecule(isomer)
# now check the input
# rationalise if needed
if self.rationalise:
molecule.generate_conformers(n_conformers=1)
if not check_missing_stereo(molecule):
result.add_molecule(molecule)
except Exception:
result.filter_molecule(molecule)
return result
[docs]class EnumerateProtomers(ToolkitValidator, CustomWorkflowComponent):
"""
Enumerate the formal charges of the input molecule using the backend toolkits through the OFFTK.
Important:
Only Openeye is supported so far.
"""
type: Literal["EnumerateProtomers"] = "EnumerateProtomers"
max_states: int = Field(
10, description="The maximum number of states that should be generated."
)
[docs] @classmethod
def is_available(cls) -> bool:
tk = super().is_available()
oe = which_import(
".oechem",
return_bool=True,
raise_error=True,
package="openeye",
raise_msg="Please install via `conda install openeye-toolkits -c openeye`",
)
return tk and oe
[docs] @classmethod
def description(cls) -> str:
return "Enumerate the protomers of the molecule, returning the input molecule and any new molecules."
[docs] @classmethod
def fail_reason(cls) -> str:
return "The molecules formal charges could not be enumerated possibly due to a missing toolkit."
[docs] @classmethod
def properties(cls) -> ComponentProperties:
return ComponentProperties(process_parallel=True, produces_duplicates=True)
def _apply(
self, molecules: List[Molecule], toolkit_registry: ToolkitRegistry
) -> ComponentResult:
"""
Enumerate the formal charges of the molecule.
Parameters:
molecules: The list of molecules the component should be applied on.
toolkit_registry: The openff.toolkit.utils.ToolkitRegistry which declares the avilable toolkits.
Returns:
A [ComponentResult][qcsubmit.datasets.ComponentResult] instance containing information about the molecules
that passed and were filtered by the component and details about the component which generated the result.
Important:
This is only possible using Openeye so far, if openeye is not available this step will fail.
"""
result = self._create_result(toolkit_registry=toolkit_registry)
for molecule in molecules:
try:
protomers = molecule.enumerate_protomers(max_states=self.max_states)
for protomer in protomers:
result.add_molecule(protomer)
result.add_molecule(molecule)
except Exception:
# if openeye is not available this will fail
result.filter_molecule(molecule)
return result
[docs]class ScanEnumerator(ToolkitValidator, CustomWorkflowComponent):
"""
This module will tag any matching substructures for scanning, useful for torsiondrive datasets.
"""
type: Literal["ScanEnumerator"] = "ScanEnumerator"
torsion_scans: List[Scan1D] = Field(
[],
description="A list of scan objects which describes the scan range and scan increment"
"that should be used with the associated smarts pattern.",
)
double_torsion_scans: List[Scan2D] = Field(
[],
description="A list of double scan objects which describes the scan ranges and scan increments,"
"that should be used with each of the smarts patterns.",
)
improper_scans: List[ImproperScan] = Field(
[],
description="A list of improper scan objects which describes the scan range and scan increment"
"that should be used with the smarts pattern.",
)
[docs] @classmethod
def description(cls) -> str:
return "Tag any matched substructures for scanning."
[docs] @classmethod
def fail_reason(cls) -> str:
return "The molecule contained no substructure matches."
[docs] @classmethod
def properties(cls) -> ComponentProperties:
return ComponentProperties(process_parallel=True, produces_duplicates=False)
[docs] def add_torsion_scan(
self,
smarts: str,
scan_rage: Optional[Tuple[int, int]] = None,
scan_increment: int = 15,
) -> None:
"""
Add a targeted 1D torsion scan to the scan enumerator.
Args:
smarts:
The numerically tagged SMARTs pattern that should be used to identify the torsion atoms.
scan_rage:
The angle in degrees the torsion should be scanned between, from low to high
scan_increment:
The value in degrees between each grid point in the scan.
"""
self.torsion_scans.append(
Scan1D(
smarts1=smarts, scan_range1=scan_rage, scan_increment=[scan_increment]
)
)
[docs] def add_double_torsion(
self,
smarts1: str,
smarts2: str,
scan_range1: Optional[Tuple[int, int]] = None,
scan_range2: Optional[Tuple[int, int]] = None,
scan_increments: List[int] = (15, 15),
) -> None:
"""
Add a targeted 2D torsion scan to the scan enumerator.
Args:
smarts1:
The numerically tagged SMARTs pattern that should be used to identify the first torsion atoms.
smarts2:
The numerically tagged SMARTs pattern that should be used to identify the second torsion atoms.
scan_range1:
The angle in degrees the first torsion should be scanned between, from low to high
scan_range2:
The angle in degrees the second torsion should be scanned between, from low to high
scan_increments:
A list of the values in degrees between each grid point in the scans.
"""
self.double_torsion_scans.append(
Scan2D(
smarts1=smarts1,
smarts2=smarts2,
scan_range1=scan_range1,
scan_range2=scan_range2,
scan_increment=scan_increments,
)
)
[docs] def add_improper_torsion(
self,
smarts: str,
central_smarts: str,
scan_range: Optional[Tuple[int, int]] = None,
scan_increment: int = 15,
) -> None:
"""
Add a targeted Improper torsion to the scan enumerator.
Args:
smarts:
The numerically tagged SMARTs pattern which describes the entire improper.
central_smarts:
The numerically tagged SMARTs pattern which identifies the central atom in the improper.
scan_range:
The angles in degrees the improper should be scanned between, from low to high.
scan_increment:
The value in degrees between each grid point in the scan.
"""
self.improper_scans.append(
ImproperScan(
smarts=smarts,
central_smarts=central_smarts,
scan_range=scan_range,
scan_increment=[scan_increment],
)
)
def _get_unique_torsions(
self, matches: List[Tuple[int, int, int, int]], symmetry_classes: List[int]
) -> List[Tuple[int, int, int, int]]:
"""
Use the symmetry classes to condense the matches into unique torsions in the molecule by symmetry.
"""
torsions_by_symmetry = {
tuple(sorted(symmetry_classes[idx] for idx in match[1:3])): match
for match in matches
}
unique_torsions = [*torsions_by_symmetry.values()]
return unique_torsions
def _apply(
self, molecules: List[Molecule], toolkit_registry: ToolkitRegistry
) -> ComponentResult:
"""
Tag any dihedrals which match the defined set of targets in the enumerator.
"""
result = self._create_result(toolkit_registry=toolkit_registry)
for molecule in molecules:
symmetry_classes = get_symmetry_classes(molecule)
molecule.properties["dihedrals"] = TorsionIndexer()
self._tag_torsions(
molecule, symmetry_classes, toolkit_registry=toolkit_registry
)
self._tag_double_torsions(
molecule, symmetry_classes, toolkit_registry=toolkit_registry
)
self._tag_improper_torsions(
molecule, symmetry_classes, toolkit_registry=toolkit_registry
)
indexer = molecule.properties["dihedrals"]
if len(indexer.get_dihedrals) == 0:
result.filter_molecule(molecule)
result.add_molecule(molecule)
return result
def _tag_torsions(
self,
molecule: Molecule,
symmetry_classes: List[int],
toolkit_registry: ToolkitRegistry,
) -> None:
"""
For each of the torsions in the torsion list try and tag only one in the molecule.
"""
indexer: TorsionIndexer = molecule.properties["dihedrals"]
for torsion in self.torsion_scans:
matches = molecule.chemical_environment_matches(
query=torsion.smarts1, toolkit_registry=toolkit_registry
)
unique_torsions = self._get_unique_torsions(
matches=matches, symmetry_classes=symmetry_classes
)
for tagged_torsion in unique_torsions:
indexer.add_torsion(
torsion=tagged_torsion,
scan_range=torsion.scan_range1,
scan_increment=torsion.scan_increment,
symmetry_group=get_symmetry_group(
atom_group=tagged_torsion[1:3],
symmetry_classes=symmetry_classes,
),
)
def _tag_double_torsions(
self,
molecule: Molecule,
symmetry_classes: List[int],
toolkit_registry: ToolkitRegistry,
) -> None:
"""
For each double torsion in the list try and tag the combination in the molecule.
"""
indexer: TorsionIndexer = molecule.properties["dihedrals"]
for double_torsion in self.double_torsion_scans:
matches1 = molecule.chemical_environment_matches(
query=double_torsion.smarts1, toolkit_registry=toolkit_registry
)
matches2 = molecule.chemical_environment_matches(
query=double_torsion.smarts2, toolkit_registry=toolkit_registry
)
unique_torsions1 = self._get_unique_torsions(
matches=matches1, symmetry_classes=symmetry_classes
)
unique_torsions2 = self._get_unique_torsions(
matches=matches2, symmetry_classes=symmetry_classes
)
for tagged_torsion1 in unique_torsions1:
symmetry_group1 = get_symmetry_group(
atom_group=tagged_torsion1[1:3], symmetry_classes=symmetry_classes
)
for tagged_torsion2 in unique_torsions2:
symmetry_group2 = get_symmetry_group(
atom_group=tagged_torsion2[1:3],
symmetry_classes=symmetry_classes,
)
indexer.add_double_torsion(
torsion1=tagged_torsion1,
torsion2=tagged_torsion2,
symmetry_group1=symmetry_group1,
symmetry_group2=symmetry_group2,
scan_range1=double_torsion.scan_range1,
scan_range2=double_torsion.scan_range2,
scan_increment=double_torsion.scan_increment,
)
def _tag_improper_torsions(
self,
molecule: Molecule,
symmetry_classes: List[int],
toolkit_registry: ToolkitRegistry,
) -> None:
"""
For each improper torsion in the list try and tag the combination in the molecule.
"""
indexer: TorsionIndexer = molecule.properties["dihedrals"]
for improper in self.improper_scans:
matches = molecule.chemical_environment_matches(
query=improper.smarts, toolkit_registry=toolkit_registry
)
unique_torsions = self._get_unique_torsions(
matches=matches, symmetry_classes=symmetry_classes
)
central_atoms = molecule.chemical_environment_matches(
improper.central_smarts
)
for tagged_torsion in unique_torsions:
symmetry_group = get_symmetry_group(
atom_group=tagged_torsion, symmetry_classes=symmetry_classes
)
for atom in central_atoms:
if atom[0] in tagged_torsion:
indexer.add_improper(
central_atom=atom[0],
improper=tagged_torsion,
symmetry_group=symmetry_group,
scan_range=improper.scan_range,
scan_increment=improper.scan_increment,
)
break