Source code for openff.qcsubmit.workflow_components.fragmentation

"""
Components that aid with Fragmentation of molecules.
"""

from typing import TYPE_CHECKING, Dict, List, Optional

from openff.toolkit.topology import Molecule
from openff.toolkit.utils import ToolkitRegistry
from qcelemental.util import which_import
from typing_extensions import Literal

from openff.qcsubmit._pydantic import Field, validator
from openff.qcsubmit.common_structures import ComponentProperties
from openff.qcsubmit.utils import get_symmetry_classes, get_symmetry_group, get_torsion
from openff.qcsubmit.validators import check_environments
from openff.qcsubmit.workflow_components.base_component import (
    CustomWorkflowComponent,
    ToolkitValidator,
)
from openff.qcsubmit.workflow_components.utils import ComponentResult, TorsionIndexer

if TYPE_CHECKING:
    from openff.fragmenter.fragment import FragmentationResult


class FragmenterBase(ToolkitValidator, CustomWorkflowComponent):
    """A common base fragmenter class which handles tagging the targeted bond for torsion driving."""

    type: Literal["FragmenterBase"]

    target_torsion_smarts: Optional[List[str]] = Field(
        None,
        description="The list of SMARTS patterns used to identify central target bonds to fragment around. By default this is any single non-termial bond.",
    )

    _check_smarts = validator(
        "target_torsion_smarts",
        each_item=True,
        allow_reuse=True,
    )(check_environments)

    @classmethod
    def fail_reason(cls) -> str:
        return "The molecule could not be fragmented correctly."

    @classmethod
    def properties(cls) -> ComponentProperties:
        return ComponentProperties(process_parallel=True, produces_duplicates=True)

    @classmethod
    def is_available(cls) -> bool:
        """
        Check if fragmenter can be imported.
        """
        toolkit = which_import(
            ".toolkit",
            raise_error=True,
            return_bool=True,
            package="openff",
            raise_msg="Please install via `conda install openff-toolkit -c conda-forge`.",
        )
        fragmenter = which_import(
            ".fragmenter",
            raise_error=True,
            return_bool=True,
            package="openff",
            raise_msg="Please install via `pip install git+https://github.com/openforcefield/fragmenter.git@master`.",
        )

        return toolkit and fragmenter

    def provenance(self, toolkit_registry: ToolkitRegistry) -> Dict:
        """
        Collect the toolkit information and add the fragmenter version information.
        """

        from openff import fragmenter

        provenance = super().provenance(toolkit_registry=toolkit_registry)

        provenance["openff-fragmenter"] = fragmenter.__version__

        return provenance

    @classmethod
    def _process_fragments(
        cls, fragments: "FragmentationResult", component_result: ComponentResult
    ):
        """Process the resulting fragments and tag the targeted bonds ready for torsiondrives."""
        from openff.fragmenter.utils import get_atom_index

        for bond_map, fragment in fragments.fragments_by_bond.items():
            fragment_mol = fragment.molecule
            # get the index of the atoms in the fragment
            atom1, atom2 = get_atom_index(fragment_mol, bond_map[0]), get_atom_index(
                fragment_mol, bond_map[1]
            )
            bond = fragment_mol.get_bond_between(atom1, atom2)
            symmetry_classes = get_symmetry_classes(fragment_mol)
            symmetry_group = get_symmetry_group(
                atom_group=(bond.atom1_index, bond.atom2_index),
                symmetry_classes=symmetry_classes,
            )
            torsion = get_torsion(bond)
            torsion_tag = TorsionIndexer()
            torsion_tag.add_torsion(torsion=torsion, symmetry_group=symmetry_group)
            fragment_mol.properties["dihedrals"] = torsion_tag
            del fragment_mol.properties["atom_map"]
            component_result.add_molecule(fragment_mol)


[docs]class WBOFragmenter(FragmenterBase):
    """
    Fragment molecules using the WBO fragmenter class of the fragmenter module.
    For more information see <https://github.com/openforcefield/fragmenter>.
    """

    type: Literal["WBOFragmenter"] = "WBOFragmenter"
    threshold: float = Field(
        0.03,
        description="The WBO error threshold between the parent and the fragment value, the fragmentation will stop when the difference between the fragment and parent is less than this value.",
    )
    keep_non_rotor_ring_substituents: bool = Field(
        False,
        description="If any non rotor ring substituents should be kept during the fragmentation resulting in smaller fragments when `False`.",
    )
    heuristic: Literal["path_length", "wbo"] = Field(
        "path_length",
        description="The path fragmenter should take when fragment needs to be grown "
        "out. The options are ``['wbo', 'path_length']``.",
    )

[docs]    @classmethod
    def description(cls) -> str:
        return (
            "Fragment a molecule across all rotatable bonds using the WBO fragmenter."
        )

    def _apply(
        self, molecules: List[Molecule], toolkit_registry: ToolkitRegistry
    ) -> ComponentResult:
        """
        Fragment the molecules using the WBOFragmenter.

        Args:
            molecules: The list of molecules which should be processed by this component.
            toolkit_registry: The openff.toolkit.utils.ToolkitRegistry which declares the available toolkits.

        Note:
            * If the input molecule fails fragmentation it will fail this component and be removed.
            * When a molecule can not be fragmented to meet the wbo threshold the parent is likely to be included in the
            dataset.
        """
        from openff.fragmenter.fragment import WBOFragmenter

        result = self._create_result(toolkit_registry=toolkit_registry)

        for molecule in molecules:
            fragment_factory = WBOFragmenter(
                threshold=self.threshold,
                keep_non_rotor_ring_substituents=self.keep_non_rotor_ring_substituents,
                heuristic=self.heuristic,
            )

            try:
                fragment_result = fragment_factory.fragment(
                    molecule=molecule,
                    toolkit_registry=toolkit_registry,
                    target_bond_smarts=self.target_torsion_smarts,
                )
                self._process_fragments(
                    fragments=fragment_result, component_result=result
                )

            except (RuntimeError, ValueError):
                # this will catch cmiles errors for molecules with undefined stero
                result.filter_molecule(molecule)

        return result


[docs]class PfizerFragmenter(FragmenterBase):
    """The openff.fragmenter implementation of the Pfizer fragmenation method as described here
    (doi: 10.1021/acs.jcim.9b00373)
    """

    type: Literal["PfizerFragmenter"] = "PfizerFragmenter"

[docs]    @classmethod
    def description(cls) -> str:
        return "Fragment a molecule across all rotatable bonds using the Pfizer fragmentation scheme."

    def _apply(
        self, molecules: List[Molecule], toolkit_registry: ToolkitRegistry
    ) -> ComponentResult:
        """
        Fragment the molecules using the PfizerFragmenter.

        Args:
            molecules: The list of molecules which should be processed by this component.
            toolkit_registry: The openff.toolkit.utils.ToolkitRegistry which declares the available toolkits.

        Note:
            * If the input molecule fails fragmentation it will be fail this component and be removed.
        """
        from openff.fragmenter.fragment import PfizerFragmenter

        result = self._create_result(toolkit_registry=toolkit_registry)

        for molecule in molecules:
            fragment_factory = PfizerFragmenter()

            try:
                fragment_result = fragment_factory.fragment(
                    molecule=molecule,
                    toolkit_registry=toolkit_registry,
                    target_bond_smarts=self.target_torsion_smarts,
                )
                self._process_fragments(
                    fragments=fragment_result, component_result=result
                )

            except (RuntimeError, ValueError):
                # this will catch cmiles errors for molecules with undefined stero
                result.filter_molecule(molecule)

        return result