# Source code for openff.nagl.utils.resonance

```
"""Enumerate resonance forms of a molecule"""
import copy
import itertools
import json
from typing import Dict, Optional, List, Generator, Tuple, Any, Union
import networkx as nx
import numpy as np
from openff.units import unit
from openff.toolkit.topology import Molecule
from openff.nagl.utils._types import ResonanceType, ResonanceAtomType
__all__ = ["ResonanceEnumerator", "enumerate_resonance_forms"]
[docs]def enumerate_resonance_forms(
molecule: Molecule,
lowest_energy_only: bool = True,
max_path_length: Optional[int] = None,
include_all_transfer_pathways: bool = False,
as_dicts: bool = False,
) -> List[Union[Molecule, Dict[str, Dict[str, Any]]]]:
"""
Recursively attempts to find all resonance structures of an input molecule
according to a modified version of the algorithm proposed by Gilson et al [1].
Enumeration proceeds by:
1. The molecule is turned into a ``networkx`` graph object.
2. All hydrogen's and uncharged sp3 carbons are removed from the graph as these
will not be involved in electron transfer.
3. Disjoint sub-graphs are detected and separated out.
4. Sub-graphs that don't contain at least 1 donor and 1 acceptor are discarded
5. For each disjoint subgraph:
a) The original v-charge algorithm is applied to yield the resonance structures
of that subgraph.
This will lead to ``M_i`` resonance structures for each of the ``N`` sub-graphs.
If ``as_dicts=True`` then the resonance states in each sub-graph are returned. This
avoids the need to combinatorially combining resonance information from each
sub-graph. When ``as_dicts=False``, all ``M_0 x M_1 x ... x M_N`` forms are fully
enumerated and return as molecule objects matching the input molecule type.
.. note::
* This method will strip all stereochemistry and aromaticity information from
the input molecule.
* The method only attempts to enumerate resonance forms that occur when a
pair of electrons can be transferred along a conjugated path from a donor to
an acceptor. Other types of resonance, e.g. different Kekule structures, are
not enumerated.
Parameters
----------
molecule: openff.toolkit.topology.Molecule
The molecule to enumerate resonance forms for.
lowest_energy_only: bool, optional
Whether to only return the resonance forms with the lowest
'energy' as defined in [1].
max_path_length: int, optional
The maximum number of bonds between a donor and acceptor to
consider. If None, all paths are considered.
as_dicts: bool, optional
Whether to return the resonance forms in a form that is more
compatible with producing feature vectors. If false, all combinatorial
resonance forms will be returned which may be significantly slow if the
molecule is very heavily conjugated and has many donor / acceptor pairs.
include_all_transfer_pathways: bool, optional
Whether to include resonance forms that have
the same formal charges but have different arrangements of bond orders. Such
cases occur when there exists multiple electron transfer pathways between
electron donor-acceptor pairs e.g. in cyclic systems.
References
----------
[1] Gilson, Michael K., Hillary SR Gilson, and Michael J. Potter. "Fast
assignment of accurate partial atomic charges: an electronegativity
equalization method that accounts for alternate resonance forms." Journal of
chemical information and computer sciences 43.6 (2003): 1982-1997.
Returns
-------
resonance_forms: List[Molecule]
A list of all resonance forms including the original molecule.
"""
enumerator = ResonanceEnumerator(molecule)
return enumerator.enumerate_resonance_forms(
lowest_energy_only=lowest_energy_only,
max_path_length=max_path_length,
include_all_transfer_pathways=include_all_transfer_pathways,
as_dicts=as_dicts,
)
[docs]class ResonanceEnumerator:
"""
A convenience class for enumerating resonance forms of a molecule
according to the algorithm proposed by Gilson et al [1].
Enumeration proceeds by:
1. The molecule is turned into a ``networkx`` graph object at :attr:`ResonanceEnumerator.graph`.
2. All hydrogen's and uncharged sp3 carbons are removed from the graph as these
will not be involved in electron transfer, yielding :attr:`ResonanceEnumerator.reduced_graph`.
3. Disjoint sub-graphs are detected and separated out.
4. Sub-graphs that don't contain at least 1 donor and 1 acceptor are discarded
5. For each disjoint subgraph:
a) The ``networkx`` graph is converted to a :class:`FragmentEnumerator`
b) The original v-charge algorithm is applied to yield the resonance structures
of that subgraph, using :meth:`FragmentEnumerator.enumerate_resonance_forms`
"""
def __init__(self, molecule: Molecule):
self.molecule = molecule
self.graph = self._convert_molecule_to_graph(molecule)
self.reduced_graph = self._reduce_graph(self.graph, inplace=False)
[docs] def enumerate_resonance_forms(
self,
lowest_energy_only: bool = True,
max_path_length: Optional[int] = None,
include_all_transfer_pathways: bool = False,
as_dicts: bool = False,
) -> List[Union[Molecule, Dict[str, Dict[str, Any]]]]:
"""
Recursively attempts to find all resonance structures of an input molecule
according to a modified version of the algorithm proposed by Gilson et al [1].
Enumeration proceeds by:
1. The molecule is turned into a ``networkx`` graph object.
2. All hydrogen's and uncharged sp3 carbons are removed from the graph as these
will not be involved in electron transfer.
3. Disjoint sub-graphs are detected and separated out.
4. Sub-graphs that don't contain at least 1 donor and 1 acceptor are discarded
5. For each disjoint subgraph:
a) The original v-charge algorithm is applied to yield the resonance structures
of that subgraph.
Parameters
----------
lowest_energy_only: bool, optional
Whether to only return the resonance forms with the lowest
'energy' as defined in [1].
max_path_length: int, optional
The maximum number of bonds between a donor and acceptor to
consider. If None, all paths are considered.
as_dicts: bool, optional
Whether to return the resonance forms as dictionaries.
If ``False``, all combinatorial
resonance forms will be returned as molecule objects.
This may be significantly slow if the
molecule is very heavily conjugated and has many donor / acceptor pairs.
If ``True`` then the resonance states in each sub-graph are returned. This
avoids the need to combinatorially combining resonance information from each
sub-graph.
include_all_transfer_pathways: bool, optional
Whether to include resonance forms that have
the same formal charges but have different arrangements of bond orders. Such
cases occur when there exists multiple electron transfer pathways between
electron donor-acceptor pairs e.g. in cyclic systems.
References
----------
[1] Gilson, Michael K., Hillary SR Gilson, and Michael J. Potter. "Fast
assignment of accurate partial atomic charges: an electronegativity
equalization method that accounts for alternate resonance forms." Journal of
chemical information and computer sciences 43.6 (2003): 1982-1997.
Returns
-------
resonance_forms: List[Molecule]
A list of all resonance forms including the original molecule.
"""
from openff.nagl.toolkits.openff import molecule_from_networkx
all_fragments = self._enumerate_resonance_fragments(
lowest_energy_only=lowest_energy_only,
max_path_length=max_path_length,
include_all_transfer_pathways=include_all_transfer_pathways,
)
graphs = [
[fragment.reduced_graph for fragment in fragments]
for fragments in all_fragments
]
combinations = itertools.product(*graphs)
resonance_forms = [
self._substitute_resonance_fragments(combination)
for combination in combinations
]
if as_dicts:
molecules = [
self._convert_graph_to_dict(resonance_form)
for resonance_form in resonance_forms
]
else:
molecules = [
molecule_from_networkx(resonance_form)
for resonance_form in resonance_forms
]
return molecules
[docs] def to_fragment(self) -> "FragmentEnumerator":
"""Convert to a FragmentEnumerator"""
graph = copy.deepcopy(self.reduced_graph)
return FragmentEnumerator(graph)
@staticmethod
def _convert_graph_to_dict(graph: nx.Graph) -> Dict[str, Dict[str, Any]]:
"""
Convert a molecule ``networkx`` graph to a dictionary
Parameters
----------
graph: :class:`networkx.Graph`
Input molecule graph
Returns
-------
molecule_dict: Dict[str, Dict[str, Any]]
A dictionary representation of the molecule.
molecule_dict["atoms"][i] contains the atom information for atom i.
molecule_dict["bonds"][(i, j)] contains the bond information for bond (i, j).
All bonds are stored in the order (i, j) where i < j.
"""
atoms = dict(graph.nodes(data=True))
bonds = {}
for i, j, info in graph.edges(data=True):
if j < i:
i, j = j, i
bonds[(i, j)] = info
return {"atoms": atoms, "bonds": bonds}
def _enumerate_resonance_fragments(
self,
lowest_energy_only: bool = True,
max_path_length: Optional[int] = None,
include_all_transfer_pathways: bool = False,
) -> List[List["FragmentEnumerator"]]:
"""
Recursively enumerate all resonance forms of disjoint subgraphs.
Parameters
----------
lowest_energy_only: bool, optional
Whether to only return the resonance forms with the lowest
'energy' as defined in [1].
max_path_length: int, optional
The maximum number of bonds between a donor and acceptor to
consider. If None, all paths are considered.
include_all_transfer_pathways: bool, optional
Whether to include resonance forms that have
the same formal charges but have different arrangements of bond orders. Such
cases occur when there exists multiple electron transfer pathways between
electron donor-acceptor pairs e.g. in cyclic systems.
Returns
-------
resonance_fragments: List[List[FragmentEnumerator]]
All enumerated disjoint subgraphs.
resonance_fragments[i][j] returns the j-th resonance form
of the i-th subgraph of the molecule.
"""
acceptor_donor_fragments = self._get_acceptor_donor_fragments()
fragment_resonance_forms = [
fragment.enumerate_resonance_forms(
lowest_energy_only=lowest_energy_only,
max_path_length=max_path_length,
include_all_transfer_pathways=include_all_transfer_pathways,
)
for fragment in acceptor_donor_fragments
]
return fragment_resonance_forms
def _substitute_resonance_fragments(
self, resonance_forms: List[nx.Graph]
) -> nx.Graph:
"""
Substitute all resonance subgraphs to
generate a new molecule graph
Parameters
----------
resonance_forms: List[nx.Graph]
All disjoint ``networkx`` graphs to substitute
Returns
-------
new_graph: nx.Graph
The new molecule graph with all resonance subgraphs
"""
graph = copy.deepcopy(self.graph)
for subgraph in resonance_forms:
self._update_graph_attributes(subgraph, graph)
return graph
@staticmethod
def _update_graph_attributes(source: nx.Graph, target: nx.Graph):
"""
Update the attributes of the nodes and edges of
a target graph with those of a source graph.
All nodes and edges in the source graph should be present
in the target.
The target graph is updated in-place.
Parameters
----------
source: :class:`networkx.Graph`
Source sub-graph
target: :class:`networkx.Graph`
Target graph
"""
for node in source.nodes:
target.nodes[node].update(source.nodes[node])
for i, j in source.edges:
target.edges[i, j].update(source.edges[i, j])
@staticmethod
def _convert_molecule_to_graph(molecule):
"""
Convert a molecule to a ``networkx`` graph
where each node has ``bond_orders`` information
Parameters
----------
molecule: :class:`openff.toolkit.topology.Molecule`
Returns
-------
graph: :class:`networkx.Graph`
"""
graph = molecule.to_networkx()
for node, atom in zip(graph.nodes, molecule.atoms):
bond_orders = tuple(sorted(bond.bond_order for bond in atom.bonds))
graph.nodes[node]["bond_orders"] = tuple(bond_orders)
return graph
@staticmethod
def _reduce_graph(graph: nx.Graph, inplace: bool = True) -> nx.Graph:
"""
Reduce a ``networkx`` graph by removing all hydrogen
and CX4 atoms
Parameters
----------
graph: :class:`networkx.Graph`
Input molecule graph with all atoms
inplace: bool
Whether to reduce the graph inplace
Returns
-------
reduced_graph: :class:`networkx.Graph`
"""
if not inplace:
graph = copy.deepcopy(graph)
nodes_to_remove = []
for index in graph.nodes:
atom_info = graph.nodes[index]
# remove hydrogens. These are implicitly captured by bond_orders
if atom_info["atomic_number"] == 1:
nodes_to_remove.append(index)
# Next prune all CX4 carbon atoms - because they have no double bonds to begin with
# there is no electron transfer that can occur to change that otherwise they will
# end up pentavalent, and so can never be part of a conjugated path
elif (
atom_info["atomic_number"] == 6
and atom_info["bond_orders"] == (1, 1, 1, 1)
and atom_info["formal_charge"] == 0
):
nodes_to_remove.append(index)
graph.remove_nodes_from(nodes_to_remove)
return graph
@staticmethod
def _fragment_networkx_graph(graph):
"""
Fragment a ``networkx`` graph into disjoint subgraphs
"""
for fragment in nx.connected_components(graph):
yield graph.subgraph(fragment)
def _get_acceptor_donor_fragments(self) -> List["FragmentEnumerator"]:
"""
Get all fragments of the molecule that contain both
electron donors and acceptors
Returns
-------
fragments: List["FragmentEnumerator"]
"""
acceptor_donor_fragments = []
for nxfragment in self._fragment_networkx_graph(self.reduced_graph):
fragment = FragmentEnumerator(nxfragment)
if fragment.acceptor_indices and fragment.donor_indices:
acceptor_donor_fragments.append(fragment)
return acceptor_donor_fragments
class FragmentEnumerator:
"""
A convenience class to enumerate resonance forms of a fragment of a molecule.
Parameters
----------
graph: nx.Graph
A ``networkx`` Graph representation of a molecule fragment.
"""
def __init__(self, graph):
self.reduced_graph = graph
self._path_cache = {}
self.resonance_types = self._get_resonance_types()
self.acceptor_indices = []
self.donor_indices = []
for index, resonance_type in self.resonance_types.items():
if resonance_type.type == ResonanceAtomType.Acceptor.value:
self.acceptor_indices.append(index)
elif resonance_type.type == ResonanceAtomType.Donor.value:
self.donor_indices.append(index)
def _get_all_odd_n_simple_paths(
self,
node_a: int,
node_b: int,
max_path_length: Optional[int] = None,
) -> Tuple[Tuple[int, ...], ...]:
"""
Get all odd length simple paths between two nodes in a graph.
Parameters
----------
node_a: int
The index of the first node.
node_b: int
The index of the second node.
max_path_length: Optional[int]
The maximum length of the paths to return. If None, all paths will be returned.
Returns
-------
all_odd_paths: Tuple[Tuple[int, ...], ...]
All odd length simple paths between the two nodes.
"""
key = (node_a, node_b)
if key in self._path_cache:
return self._path_cache[key]
all_paths = map(
tuple,
nx.all_simple_paths(
self.reduced_graph,
node_a,
node_b,
cutoff=max_path_length,
),
)
odd_paths = [path for path in all_paths if len(path) % 2 == 1]
odd_paths = tuple(sorted(odd_paths, key=len, reverse=True))
self._path_cache[key] = odd_paths
self._path_cache[key[::-1]] = odd_paths
return odd_paths
def _get_transfer_paths(
self,
donor_node: int,
acceptor_node: int,
max_path_length: Optional[int] = None,
) -> Generator[Tuple[int, ...], None, None]:
"""
Attempts to find all possible electron transfer paths, as defined by Gilson et
al [1], between a donor and an acceptor atom.
Parameters
----------
donor_node: int
The node of the donor atom
acceptor_node: int
The node of the acceptor atom
max_path_length: Optional[int]
The maximum length of the paths to search.
If None, all paths will be returned.
Returns
-------
transfer_paths: Generator[Tuple[int, ...], None, None
A list of any 'electron transfer' paths that begin from the donor atom and end
at the acceptor atom.
"""
for path in self._get_all_odd_n_simple_paths(
donor_node, acceptor_node, max_path_length
):
if self._is_transfer_path(path):
yield path
def _is_transfer_path(self, path: Tuple[int, ...]) -> bool:
"""
Determine if a path is a transfer path based on alternating bond order.
This checks whether the bonds along the path form a rising-falling sequence.
Parameters
----------
path: Tuple[int, ...]
The path to check. Each number is a node in the graph that
should be bonded to the nodes on either side.
Returns
-------
is_transfer_path: bool
Whether the path is a transfer path.
"""
edges = zip(path[:-1], path[1:])
bond_orders = np.array(
[self.reduced_graph.edges[i, j]["bond_order"] for i, j in edges]
)
deltas = bond_orders[1:] - bond_orders[:-1]
return np.all(deltas[::2] == 1) and np.all(deltas[1::2] == -1)
def _to_resonance_dict(
self, include_bonds: bool = True, include_formal_charges: bool = False
) -> Dict[str, List[int]]:
"""
A convenience method to convert a molecule fragment to a dictionary.
This is used to generate a hash of the fragment.
Parameters
----------
include_bonds: bool
Whether to include bond orders
include_formal_charges: bool
Whether to include atom formal charges
Returns
-------
dictionary: Dict[str, List[int]]
A dictionary representation of the fragment.
The keys are "acceptor_indices" and "donor_indices".
Optionally, they can include "bond_orders" and "formal_charges".
"""
hash_dict = {
"acceptor_indices": self.acceptor_indices,
"donor_indices": self.donor_indices,
}
if include_bonds:
bond_data = self.reduced_graph.edges(data="bond_order")
hash_dict["bond_orders"] = {(i, j): x for i, j, x in bond_data}
if include_formal_charges:
hash_dict["formal_charges"] = [
q.m_as(unit.elementary_charge)
for _, q in self.reduced_graph.nodes(data="formal_charge")
]
return hash_dict
def _to_resonance_json(
self, include_bonds: bool = True, include_formal_charges: bool = False
) -> str:
"""
A convenience method to convert a molecule fragment to a JSON string
Parameters
----------
include_bonds: bool
Whether to include bond orders
include_formal_charges: bool
Whether to include atom formal charges
Returns
-------
string: str
"""
resonance_dict = self._to_resonance_dict(
include_bonds=include_bonds,
include_formal_charges=include_formal_charges,
)
if include_bonds:
resonance_dict["bond_orders"] = sorted(
map(tuple, resonance_dict["bond_orders"].items())
)
return json.dumps(resonance_dict, sort_keys=True)
def _to_resonance_hash(
self, include_bonds: bool = True, include_formal_charges: bool = False
) -> bytes:
"""
A convenience method to convert a molecule fragment to a hash
Parameters
----------
include_bonds: bool
Whether to include bond orders
include_formal_charges: bool
Whether to include atom formal charges
Returns
-------
hashed_string: bytes
"""
import hashlib
json_str = self._to_resonance_json(
include_bonds=include_bonds, include_formal_charges=include_formal_charges
)
return hashlib.sha1(json_str.encode(), usedforsecurity=False).digest()
def _transfer_electrons(self, path: Tuple[int, ...]):
"""
Carries out an electron transfer along the pre-determined transfer path starting
from a donor and ending in an acceptor.
Parameters
----------
path: Tuple[int, ...]
The path along which to transfer electrons.
The first and last atoms are the donor and acceptor, respectively.
Returns
-------
new_fragment: FragmentEnumerator
The new fragment after the electron transfer
"""
graph = copy.deepcopy(self.reduced_graph)
if not len(path):
return type(self)(graph)
donor_index, acceptor_index = path[0], path[-1]
for index in [donor_index, acceptor_index]:
resonance_type = self._get_atom_resonance_type(index)
conjugate_key = resonance_type.get_conjugate_key()
charge = conjugate_key.formal_charge * unit.elementary_charge
graph.nodes[index]["formal_charge"] = charge
graph.nodes[index]["atomic_number"] = conjugate_key.atomic_number
graph.nodes[index]["bond_orders"] = conjugate_key.bond_orders
for bond_index, (i, j) in enumerate(zip(path[:-1], path[1:])):
increment = 1 if bond_index % 2 == 0 else -1
graph.edges[i, j]["bond_order"] += increment
return type(self)(graph)
def _get_atom_resonance_type(self, index: int) -> ResonanceType.Value:
"""
Get the resonance type of an atom.
Parameters
----------
node: int
Node of the atom to get resonance type of
Returns
-------
resonance_type: ResonanceType.Value
"""
node_info = self.reduced_graph.nodes[index]
charge = node_info["formal_charge"].m_as(unit.elementary_charge)
return ResonanceType.get_resonance_type(
atomic_number=node_info["atomic_number"],
formal_charge=charge,
bond_orders=node_info["bond_orders"],
)
def _get_resonance_types(self) -> Dict[int, ResonanceType.Value]:
"""Get resonance types of acceptor and donor atoms.
Returns
-------
resonance_types: Dict[int, ResonanceType.Value]
A dictionary of resonance types for each atom in the graph.
Keys are node numbers.
"""
resonance_types = {}
for index in self.reduced_graph.nodes:
try:
resonance_types[index] = self._get_atom_resonance_type(index)
except KeyError:
pass
return resonance_types
def _enumerate_donor_acceptor_resonance_forms(
self,
max_path_length: Optional[int] = None,
) -> Generator["FragmentEnumerator", None, None]:
"""Enumerate all resonance forms of this fragment
by transferring electrons
Parameters
----------
max_path_length: int, optional
The maximum length of possible transfer paths.
If None, all paths are considered.
Returns
-------
resonance_forms: Generator[FragmentEnumerator, None, None]
"""
for acceptor_index in self.acceptor_indices:
for donor_index in self.donor_indices:
for path in self._get_transfer_paths(
donor_index, acceptor_index, max_path_length
):
transferred = self._transfer_electrons(path)
yield transferred
def enumerate_resonance_forms(
self,
include_all_transfer_pathways: bool = False,
lowest_energy_only: bool = True,
max_path_length: Optional[int] = None,
) -> List["FragmentEnumerator"]:
"""
Recursively enumerate all resonance forms for a fragment
using the v-charge algorithm.
Parameters
----------
include_all_transfer_pathways: bool
Whether to include all possible transfer pathways.
If False, drop resonance forms that only differ due to bond order
rearrangements, e.g. ring resonance structures
lowest_energy_only: bool
Whether to only include the lowest energy resonance forms.
max_path_length: int, optional
The maximum length of possible transfer paths.
If None, all paths are considered.
Returns
-------
resonance_forms: List[FragmentEnumerator]
"""
self_hash = self._to_resonance_hash(include_bonds=True)
open_forms: Dict[bytes, FragmentEnumerator] = {self_hash: self}
closed_forms: Dict[bytes, FragmentEnumerator] = {}
while open_forms:
current_forms: Dict[bytes, FragmentEnumerator] = {}
for current_key, current_fragment in open_forms.items():
if current_key in closed_forms:
continue
closed_forms[current_key] = current_fragment
new_forms = current_fragment._enumerate_donor_acceptor_resonance_forms(
max_path_length=max_path_length
)
for new_fragment in new_forms:
new_key = new_fragment._to_resonance_hash(include_bonds=True)
if new_key not in closed_forms and new_key not in open_forms:
current_forms[new_key] = new_fragment
open_forms = current_forms
if not include_all_transfer_pathways:
# Drop resonance forms that only differ due to bond order re-arrangements as we
# aren't interested in e.g. ring resonance structures.
closed_forms = {
fragment._to_resonance_hash(include_bonds=False): fragment
for fragment in closed_forms.values()
}
if lowest_energy_only:
closed_forms = self._select_lowest_energy_forms(closed_forms)
graphs = list(closed_forms.values())
return graphs
@staticmethod
def _select_lowest_energy_forms(
forms: Dict[Any, "FragmentEnumerator"]
) -> Dict[Any, "FragmentEnumerator"]:
"""
Select the resonance forms with the lowest energy.
Parameters
----------
forms: Dict[Any, FragmentEnumerator]
All possible resonance forms
Returns
-------
lowest_energy_forms: Dict[Any, FragmentEnumerator]
The resonance forms with the lowest energy.
"""
energies: Dict[Any, float] = {
key: form._get_resonance_energy() for key, form in forms.items()
}
lowest = min(energies.values())
lowest_forms = {
key: forms[key]
for key, energy in energies.items()
if np.isclose(energy, lowest)
}
return lowest_forms
def _get_resonance_energy(self) -> float:
"""
Calculate the resonance energy sum of all resonance atoms in this form
Returns
-------
energy: float
"""
return sum(res.energy for res in self.resonance_types.values())
```