Source code for openff.toolkit.utils.utils

"""
Utility subroutines

"""

__all__ = [
    "requires_package",
    "inherit_docstrings",
    "all_subclasses",
    "temporary_cd",
    "get_data_file_path",
    "unit_to_string",
    "quantity_to_string",
    "string_to_unit",
    "string_to_quantity",
    "object_to_quantity",
    "serialize_numpy",
    "deserialize_numpy",
    "convert_all_quantities_to_string",
    "convert_all_strings_to_quantity",
    "convert_0_1_smirnoff_to_0_2",
    "convert_0_2_smirnoff_to_0_3",
    "get_molecule_parameterIDs",
]

import contextlib
import functools
import logging
from typing import TYPE_CHECKING, Any, Iterable, TypeVar, Union, overload

import numpy
import numpy as np
import pint
from openff.units import Quantity, Unit, unit
from openff.utilities import requires_package

if TYPE_CHECKING:
    from openff.toolkit import ForceField, Molecule

logger = logging.getLogger(__name__)


[docs]def inherit_docstrings(cls):
    """Inherit docstrings from parent class"""
    from inspect import getmembers, isfunction

    for name, func in getmembers(cls, isfunction):
        if func.__doc__:
            continue
        for parent in cls.__mro__[1:]:
            if hasattr(parent, name):
                func.__doc__ = getattr(parent, name).__doc__
    return cls


T = TypeVar("T")


[docs]def all_subclasses(cls: type[T]) -> list[type[T]]:
    """Recursively retrieve all subclasses of the specified class"""
    return cls.__subclasses__() + [
        g for s in cls.__subclasses__() for g in all_subclasses(s)
    ]


[docs]@contextlib.contextmanager
def temporary_cd(dir_path):
    """Context to temporary change the working directory.

    Parameters
    ----------

    dir_path
        The directory path to enter within the context

    Examples
    --------

    >>> dir_path = '/tmp'
    >>> with temporary_cd(dir_path):
    ...     pass  # do something in dir_path

    """
    import os

    prev_dir = os.getcwd()
    os.chdir(os.path.abspath(dir_path))
    try:
        yield
    finally:
        os.chdir(prev_dir)


[docs]def get_data_file_path(relative_path: str) -> str:
    """Get the full path to one of the reference files in testsystems.
    In the source distribution, these files are in ``openff/toolkit/data/``,
    but on installation, they're moved to somewhere in the user's python
    site-packages directory.

    Parameters
    ----------

    relative_path
        Name of the file to load (with respect to `openff/toolkit/data/`)

    """
    from importlib.resources import files

    _DATA_ROOT = files("openff.toolkit") / "data"

    file_path = _DATA_ROOT / relative_path

    if not file_path.exists():  # type: ignore[attr-defined]
        raise ValueError(
            f"Sorry! {file_path} does not exist. If you just added it, you'll have to re-install"
        )

    return str(file_path)


@pint.register_unit_format("simple")
def format_unit_simple(unit, registry, **options):
    return " * ".join(f"{u} ** {p}" for u, p in unit.items())


[docs]def unit_to_string(input_unit: Unit) -> str:
    return f"{input_unit:simple}"


def quantity_to_dict(input_quantity):
    value = input_quantity.magnitude
    if isinstance(value, np.ndarray):
        value = value.tolist()

    return {
        "value": value,
        "unit": str(input_quantity.units),
    }


def dict_to_quantity(input_dict):
    return input_dict["value"] * Unit(input_dict["unit"])


def quantity_to_string(input_quantity: Quantity) -> str:
    """
    Serialize a ``openff.units.Quantity`` to a string representation that is backwards-compatible
    with older versions of the OpenFF Toolkit. This includes a " * " between numerical values and
    their units and "A" being used in place of the unicode Å ("\N{ANGSTROM SIGN}").

    Parameters
    ----------
    input_quantity
        The quantity to serialize

    Returns
    -------
    output_string
        The serialized quantity

    """
    unitless_value = input_quantity.m_as(input_quantity.units)
    # The string representation of a numpy array doesn't have commas and breaks the
    # parser, thus we convert any arrays to list here
    if isinstance(unitless_value, np.ndarray):
        unitless_value = list(unitless_value)

    unit_string = unit_to_string(input_quantity.units)

    return f"{unitless_value} * {unit_string}"


def string_to_unit(unit_string):
    """
    Deserializes a ``openff.units.Quantity`` from a string representation, for
    example: "kilocalories_per_mole / angstrom ** 2"


    Parameters
    ----------
    unit_string
        Serialized representation of a ``openff.units.Quantity``.

    Returns
    -------
    output_unit
        The deserialized unit from the string
    """
    return Unit(unit_string)


def string_to_quantity(quantity_string) -> Union[str, int, float, Quantity]:
    """Attempt to parse a string into a ``Quantity``.

    Note that dimensionless floats and ints are returns as floats or ints, not ``Quantity`` objects.
    """

    from tokenize import TokenError

    from pint import UndefinedUnitError

    try:
        quantity = Quantity(quantity_string)
    except (TokenError, UndefinedUnitError):
        return quantity_string

    # TODO: Should intentionally unitless array-likes be Quantity objects
    #       or their raw representation?
    if (quantity.units == unit.dimensionless) and isinstance(quantity.m, (int, float)):
        return quantity.m
    else:
        return quantity


def convert_all_strings_to_quantity(
    smirnoff_data: dict,
    ignore_keys: Iterable[str] = tuple(),
) -> dict:
    """
    Traverses a SMIRNOFF data structure, attempting to convert all
    quantity-defining strings into ``openff.units.Quantity`` objects.

    Integers and floats are ignored and not converted into a dimensionless
    ``openff.units.Quantity`` object.

    Some good keys to ignore include `ignore_keys=["smirks", "name", "id", "parent_id"]`
    since these are commonly used in SMIRNOFF force fields in ways that are meant to
    remain strings. For more context, see `Issue #1635`_.

    .. _Issue #1635: https://github.com/openforcefield/openff-toolkit/issues/1635.

    Parameters
    ----------
    smirnoff_data
        A hierarchical dict structured in compliance with the SMIRNOFF spec
    ignore_keys
        A list of keys to skip when converting strings to quantities

    Returns
    -------
    converted_smirnoff_data
        A hierarchical dict structured in compliance with the SMIRNOFF spec,
        with quantity-defining strings converted to ``openff.units.Quantity`` objects
    """
    from pint import DefinitionSyntaxError

    if isinstance(smirnoff_data, dict):
        for key, value in smirnoff_data.items():
            if key in ignore_keys:
                smirnoff_data[key] = value
            else:
                smirnoff_data[key] = convert_all_strings_to_quantity(
                    value,
                    ignore_keys=ignore_keys,
                )
        obj_to_return = smirnoff_data

    elif isinstance(smirnoff_data, list):
        for index, item in enumerate(smirnoff_data):
            smirnoff_data[index] = convert_all_strings_to_quantity(
                item,
                ignore_keys=ignore_keys,
            )
        obj_to_return = smirnoff_data

    elif isinstance(smirnoff_data, int) or isinstance(smirnoff_data, float):
        obj_to_return = smirnoff_data

    else:
        try:
            obj_to_return = object_to_quantity(smirnoff_data)
        except (TypeError, DefinitionSyntaxError):
            obj_to_return = smirnoff_data

    return obj_to_return


@overload
def convert_all_quantities_to_string(  # noqa: E704
    smirnoff_data: list[Quantity],
) -> list[str]: ...


@overload
def convert_all_quantities_to_string(  # noqa: E704
    smirnoff_data: dict,
) -> Union[list[str], dict[str, Any]]: ...


@overload
def convert_all_quantities_to_string(  # noqa: E704
    smirnoff_data: "Quantity",
) -> Union[str, list[str], dict[str, Any]]: ...


def convert_all_quantities_to_string(
    smirnoff_data: Union[dict, str, Quantity, list]
) -> Union[str, dict[str, Any], list[str]]:
    """
    Traverses a SMIRNOFF data structure, attempting to convert all
    quantities into strings.

    Parameters
    ----------
    smirnoff_data
        A hierarchical dict structured in compliance with the SMIRNOFF spec

    Returns
    -------
    converted_smirnoff_data
        A hierarchical dict structured in compliance with the SMIRNOFF spec,
        with ``openff.units.Quantity``s converted to string
    """

    if isinstance(smirnoff_data, dict):
        for key, value in smirnoff_data.items():
            smirnoff_data[key] = convert_all_quantities_to_string(value)
        return smirnoff_data
    elif isinstance(smirnoff_data, list):
        for index, item in enumerate(smirnoff_data):
            smirnoff_data[index] = convert_all_quantities_to_string(item)
        return smirnoff_data
    elif isinstance(smirnoff_data, Quantity):
        return quantity_to_string(smirnoff_data)
    else:
        return smirnoff_data


@functools.singledispatch
def object_to_quantity(object):
    """
    Attempts to turn the provided object into `openff.units.Quantity`s.

    Can handle float, int, str, `Quantity`, `openmm.unit.Quantity`, or iterators over
    the same. Raises an exception if unable to convert all inputs.

    Parameters
    ----------
    object
        The object to convert to a ``Quantity`` object(s).

    Returns
    -------
    converted_object

    """
    # If we can't find a custom type, we treat this as a generic iterator.
    return [object_to_quantity(sub_obj) for sub_obj in object]


@object_to_quantity.register(Quantity)
def _(obj):
    return obj


@object_to_quantity.register(str)
def _(obj):
    import pint

    try:
        return string_to_quantity(obj)
    except pint.errors.UndefinedUnitError:
        raise ValueError


@object_to_quantity.register(int)
@object_to_quantity.register(float)
def _(obj):
    return Quantity(obj)


try:
    import openmm.unit
    from openff.units.openmm import from_openmm

    @object_to_quantity.register(openmm.unit.Quantity)
    def _(obj):
        return from_openmm(obj)

except ImportError:
    pass  # pragma: nocover


def serialize_numpy(np_array) -> tuple[bytes, tuple[int]]:
    """
    Serializes a numpy array into a big-endian bytestring and tuple representing its shape.

    Parameters
    ----------
    np_array
        Input numpy array

    Returns
    -------
    serialized
        A big-endian bytestring of the NumPy array.
    shape
        The shape of the serialized array
    """
    import numpy as np

    bigendian_float = np.dtype(float).newbyteorder(">")
    bigendian_array = np_array.astype(bigendian_float)
    serialized = bigendian_array.tobytes()
    shape = np_array.shape
    return serialized, shape


def deserialize_numpy(
    serialized_np: Union[bytes, list],
    shape: tuple[int, ...],
) -> numpy.ndarray:
    """
    Deserializes a numpy array from a bytestring or list. The input, if a bytestring, is
    assumed to be in big-endian byte order.

    Parameters
    ----------
    serialized_np
        A byte or list serialized representation of a numpy array
    shape
        The shape of the serialized array

    Returns
    -------
    np_array
        The deserialized numpy array
    """

    import numpy as np

    if isinstance(serialized_np, list):
        np_array = np.array(serialized_np)
    if isinstance(serialized_np, bytes):
        dt = np.dtype("float").newbyteorder(">")
        np_array = np.frombuffer(serialized_np, dtype=dt)
    np_array = np_array.reshape(shape)
    return np_array


[docs]def convert_0_2_smirnoff_to_0_3(smirnoff_data_0_2):
    """
    Convert an 0.2-compliant SMIRNOFF dict to an 0.3-compliant one.
    This involves removing units from header tags and adding them
    to attributes of child elements.
    It also requires converting ProperTorsions and ImproperTorsions
    potentials from "charmm" to "fourier".

    Parameters
    ----------
    smirnoff_data_0_2
        Hierarchical dict representing a SMIRNOFF data structure according the the 0.2 spec

    Returns
    -------
    smirnoff_data_0_3
        Hierarchical dict representing a SMIRNOFF data structure according the the 0.3 spec
    """
    # Legacy force fields sometimes specify the NonbondedForce's sigma_unit value, but then provide
    # atom size as rmin_half. Here we correct for this behavior by explicitly defining both as
    # the same unit if either one is defined.
    if "vdW" in smirnoff_data_0_2["SMIRNOFF"].keys():
        rmh_unit = smirnoff_data_0_2["SMIRNOFF"]["vdW"].get("rmin_half_unit", None)
        sig_unit = smirnoff_data_0_2["SMIRNOFF"]["vdW"].get("sigma_unit", None)
        if (rmh_unit is not None) and (sig_unit is None):
            smirnoff_data_0_2["SMIRNOFF"]["vdW"]["sigma_unit"] = rmh_unit
        elif (sig_unit is not None) and (rmh_unit is None):
            smirnoff_data_0_2["SMIRNOFF"]["vdW"]["rmin_half_unit"] = sig_unit
        # If both are None, or both are defined, don't overwrite anything
        else:
            pass

    # Recursively attach unit strings
    smirnoff_data = recursive_attach_unit_strings(smirnoff_data_0_2, {})

    # Change TorsionHandler potential from "charmm" to "k*(1+cos(periodicity*theta-phase))". Note that, scientifically,
    # we should have used "k*(1+cos(periodicity*theta-phase))" all along, since "charmm" technically
    # implies that we would support a harmonic potential for torsion terms with periodicity 0
    # More at: https://github.com/openforcefield/openff-toolkit/issues/303#issuecomment-490156779
    if "ProperTorsions" in smirnoff_data["SMIRNOFF"]:
        if "potential" in smirnoff_data["SMIRNOFF"]["ProperTorsions"]:
            if smirnoff_data["SMIRNOFF"]["ProperTorsions"]["potential"] == "charmm":
                smirnoff_data["SMIRNOFF"]["ProperTorsions"][
                    "potential"
                ] = "k*(1+cos(periodicity*theta-phase))"
    if "ImproperTorsions" in smirnoff_data["SMIRNOFF"]:
        if "potential" in smirnoff_data["SMIRNOFF"]["ImproperTorsions"]:
            if smirnoff_data["SMIRNOFF"]["ImproperTorsions"]["potential"] == "charmm":
                smirnoff_data["SMIRNOFF"]["ImproperTorsions"][
                    "potential"
                ] = "k*(1+cos(periodicity*theta-phase))"

    # Add per-section tag
    sections_not_to_version_0_3 = ["Author", "Date", "version", "aromaticity_model"]
    for l1_tag in smirnoff_data["SMIRNOFF"].keys():
        if l1_tag not in sections_not_to_version_0_3:
            if smirnoff_data["SMIRNOFF"][l1_tag] is None:
                # Handle empty entries, such as the ToolkitAM1BCC handler.
                smirnoff_data["SMIRNOFF"][l1_tag] = {}

            smirnoff_data["SMIRNOFF"][l1_tag]["version"] = 0.3

    # Update top-level tag
    smirnoff_data["SMIRNOFF"]["version"] = 0.3

    return smirnoff_data


[docs]def convert_0_1_smirnoff_to_0_2(smirnoff_data_0_1):
    """
    Convert an 0.1-compliant SMIRNOFF dict to an 0.2-compliant one.
    This involves renaming several tags, adding Electrostatics and ToolkitAM1BCC tags, and
    separating improper torsions into their own section.

    Parameters
    ----------
    smirnoff_data_0_1
        Hierarchical dict representing a SMIRNOFF data structure according the the 0.1 spec

    Returns
    -------
    smirnoff_data_0_2
        Hierarchical dict representing a SMIRNOFF data structure according the the 0.2 spec
    """
    smirnoff_data = smirnoff_data_0_1.copy()

    l0_replacement_dict = {"SMIRFF": "SMIRNOFF"}
    l1_replacement_dict = {
        "HarmonicBondForce": "Bonds",
        "HarmonicAngleForce": "Angles",
        "PeriodicTorsionForce": "ProperTorsions",
        "NonbondedForce": "vdW",
    }
    for old_l0_tag, new_l0_tag in l0_replacement_dict.items():
        # Convert first-level smirnoff_data tags.
        # Right now this just changes the SMIRFF tag to SMIRNOFF
        if old_l0_tag in smirnoff_data.keys():
            smirnoff_data[new_l0_tag] = smirnoff_data[old_l0_tag]
            del smirnoff_data[old_l0_tag]

    # SMIRFF tag will have been converted to SMIRNOFF here
    # Convert second-level tags here
    for old_l1_tag, new_l1_tag in l1_replacement_dict.items():
        if old_l1_tag in smirnoff_data["SMIRNOFF"].keys():
            smirnoff_data["SMIRNOFF"][new_l1_tag] = smirnoff_data["SMIRNOFF"][
                old_l1_tag
            ]
            del smirnoff_data["SMIRNOFF"][old_l1_tag]

    # Add 'potential' field to each l1 tag
    default_potential = {
        "Bonds": "harmonic",
        "Angles": "harmonic",
        "ProperTorsions": "charmm",
        # Note that "charmm" isn't actually correct, and was later changed
        # in the 0.3 spec. More info at
        # https://github.com/openforcefield/openff-toolkit/pull/311#commitcomment-33494506
        "vdW": "Lennard-Jones-12-6",
    }
    for l1_tag in smirnoff_data["SMIRNOFF"].keys():
        if l1_tag in default_potential.keys():
            # Ensure that it isn't there already (shouldn't happen, but better to be safe)
            if "potential" in smirnoff_data["SMIRNOFF"][l1_tag].keys():
                assert smirnoff_data[l1_tag].keys == default_potential[l1_tag]
                continue
            # Issue an informative warning about assumptions made during conversion.
            logger.warning(
                f"0.1 SMIRNOFF spec file does not contain 'potential' attribute for '{l1_tag}' tag. "
                f"The SMIRNOFF spec converter is assuming it has a value of '{default_potential[l1_tag]}'"
            )
            smirnoff_data["SMIRNOFF"][l1_tag]["potential"] = default_potential[l1_tag]

    # Separate improper torsions from propers
    if "ProperTorsions" in smirnoff_data["SMIRNOFF"]:
        if "Improper" in smirnoff_data["SMIRNOFF"]["ProperTorsions"]:
            # First generate an ImproperTorsions header, taking the relevant values from the ProperTorsions header
            improper_section = {
                "k_unit": smirnoff_data["SMIRNOFF"]["ProperTorsions"]["k_unit"],
                "phase_unit": smirnoff_data["SMIRNOFF"]["ProperTorsions"]["phase_unit"],
                "potential": smirnoff_data["SMIRNOFF"]["ProperTorsions"]["potential"],
                "Improper": smirnoff_data["SMIRNOFF"]["ProperTorsions"]["Improper"],
            }

            # Then, attach the newly-made ImproperTorsions section
            smirnoff_data["SMIRNOFF"]["ImproperTorsions"] = improper_section
            del smirnoff_data["SMIRNOFF"]["ProperTorsions"]["Improper"]

    # Add Electrostatics tag, setting several values to their defaults and
    # warning about assumptions that are being made
    electrostatics_section = {
        "method": "PME",
        "scale12": 0.0,
        "scale13": 0.0,
        "scale15": 1.0,
        "cutoff": 9.0,
        "cutoff_unit": "angstrom",
    }
    logger.warning(
        "0.1 SMIRNOFF spec did not allow the 'Electrostatics' tag. Adding it in 0.2 spec conversion, and "
        "assuming the following values:"
    )
    for key, val in electrostatics_section.items():
        logger.warning(f"\t{key}: {val}")

    # Take electrostatics 1-4 scaling term from 0.1 spec's NonBondedForce tag
    electrostatics_section["scale14"] = smirnoff_data["SMIRNOFF"]["vdW"][
        "coulomb14scale"
    ]
    del smirnoff_data["SMIRNOFF"]["vdW"]["coulomb14scale"]
    smirnoff_data["SMIRNOFF"]["Electrostatics"] = electrostatics_section

    # Change vdW's lj14scale to 14scale, add other scaling terms
    vdw_section_additions = {
        "method": "cutoff",
        "combining_rules": "Lorentz-Berthelot",
        "scale12": "0.0",
        "scale13": "0.0",
        "scale15": "1",
        "switch_width": "1.0",
        "switch_width_unit": "angstrom",
        "cutoff": "9.0",
        "cutoff_unit": "angstrom",
    }
    for key, val in vdw_section_additions.items():
        if key not in smirnoff_data["SMIRNOFF"]["vdW"].keys():
            logger.warning(
                f"0.1 SMIRNOFF spec file does not contain '{key}' attribute for 'NonBondedMethod/vdW'' tag. "
                f"The SMIRNOFF spec converter is assuming it has a value of '{val}'"
            )
            smirnoff_data["SMIRNOFF"]["vdW"][key] = val

    # Rename L-J 1-4 scaling term from 0.1 spec's NonBondedForce tag to vdW's scale14
    smirnoff_data["SMIRNOFF"]["vdW"]["scale14"] = smirnoff_data["SMIRNOFF"]["vdW"][
        "lj14scale"
    ]
    del smirnoff_data["SMIRNOFF"]["vdW"]["lj14scale"]

    # Add <ToolkitAM1BCC/> tag
    smirnoff_data["SMIRNOFF"]["ToolkitAM1BCC"] = {}

    # Update top-level tag
    smirnoff_data["SMIRNOFF"]["version"] = 0.2

    return smirnoff_data


def recursive_attach_unit_strings(smirnoff_data, units_to_attach):
    """
    Recursively traverse a SMIRNOFF data structure, appending "* {unit}" to values in key:value pairs
    where "key_unit":"unit_string" is present at a higher level in the hierarchy.
    This function expects all items in smirnoff_data to be formatted as strings.

    Parameters
    ----------
    smirnoff_data
        Any level of hierarchy that is part of a SMIRNOFF dict, with all data members
        formatted as string.
    units_to_attach
        Dict of the form {key:unit_string}

    Returns
    -------
    unit_appended_smirnoff_data: dict
    """
    import re

    # Make a copy of units_to_attach so we don't modify the original (otherwise things like k_unit could
    # leak between sections)
    units_to_attach = units_to_attach.copy()
    # smirnoff_data = smirnoff_data.copy()

    # If we're working with a dict, see if there are any new unit entries and store them,
    # then operate recursively on the values in the dict.
    if isinstance(smirnoff_data, dict):
        # Go over all key:value pairs once to see if there are new units to attach.
        # Note that units to be attached can be defined in the same dict as the
        # key:value pair they will be attached to, so we need to complete this check
        # before we are able to check other items in the dict.
        for key, value in list(smirnoff_data.items()):
            if key[-5:] == "_unit":
                units_to_attach[key[:-5]] = value
                del smirnoff_data[key]

        # Go through once more to attach units as appropriate
        for key in smirnoff_data.keys():
            # We use regular expressions to catch possible indexed attributes
            attach_unit = None
            for unit_key, unit_string in units_to_attach.items():
                if re.match(f"{unit_key}[0-9]*", key):
                    attach_unit = unit_string

            if attach_unit is not None:
                smirnoff_data[key] = str(smirnoff_data[key]) + " * " + attach_unit

            # And recursively act on value, in case it's a deeper level of hierarchy
            smirnoff_data[key] = recursive_attach_unit_strings(
                smirnoff_data[key], units_to_attach
            )

    # If it's a list, operate on each member of the list
    elif isinstance(smirnoff_data, list):
        for index, value in enumerate(smirnoff_data):
            smirnoff_data[index] = recursive_attach_unit_strings(value, units_to_attach)

    # Otherwise, just return smirnoff_data unchanged
    else:
        pass

    return smirnoff_data


[docs]def get_molecule_parameterIDs(
    molecules: list["Molecule"], forcefield: "ForceField"
) -> tuple[dict, dict]:
    """Process a list of molecules with a specified SMIRNOFF ffxml file and determine which parameters are used by
    which molecules, returning collated results.

    Parameters
    ----------
    molecules
        List of molecules (with explicit hydrogens) to parse
    forcefield
        The ForceField to apply

    Returns
    -------
    parameters_by_molecule
        Parameter IDs used in each molecule, keyed by isomeric SMILES
        generated from provided OEMols. Each entry in the dict is a list
        which does not necessarily have unique entries; i.e. parameter IDs
        which are used more than once will occur multiple times.

    parameters_by_ID
        Molecules in which each parameter ID occur, keyed by parameter ID.
        Each entry in the dict is a set of isomeric SMILES for molecules
        in which that parameter occurs. No frequency information is stored.

    """

    from openff.toolkit.topology import Topology

    # Create storage
    parameters_by_molecule: dict[str, list] = dict()
    parameters_by_ID: dict[str, set[str]] = dict()

    # Generate isomeric SMILES for each molecule, ensuring all molecules are unique
    isosmiles = [molecule.to_smiles() for molecule in molecules]
    already_seen = set()
    duplicates = set(
        smiles
        for smiles in isosmiles
        if smiles in already_seen or already_seen.add(smiles)  # type: ignore[func-returns-value]
    )
    if len(duplicates) > 0:
        raise ValueError(
            "Error: get_molecule_parameterIDs has been provided a list of oemols which contains some duplicates: "
            f"{duplicates}"
        )

    # Assemble molecules into a Topology
    topology = Topology()
    for molecule in molecules:
        topology.add_molecule(molecule)

    # Label molecules
    labels = forcefield.label_molecules(topology)

    # Organize labels into output dictionary by looping over all molecules/smiles
    for idx in range(len(isosmiles)):
        # Pull smiles, initialize storage
        smi = isosmiles[idx]
        parameters_by_molecule[smi] = []

        # Organize data for this molecule
        data = labels[idx]
        for force_type in data.keys():
            for parameter_type in data[force_type].values():
                pid = parameter_type.id
                # Store pid to molecule
                parameters_by_molecule[smi].append(pid)

                # Store which molecule this pid occurred in
                if pid not in parameters_by_ID:
                    parameters_by_ID[pid] = set()
                    parameters_by_ID[pid].add(smi)
                else:
                    parameters_by_ID[pid].add(smi)

    return parameters_by_molecule, parameters_by_ID


def sort_smirnoff_dict(data):
    """
    Recursively sort the keys in a dict of SMIRNOFF data.

    Adapted from https://stackoverflow.com/a/47882384/4248961

    TODO: Should this live elsewhere?
    """
    sorted_dict = dict()
    for key, val in sorted(data.items()):
        if isinstance(val, dict):
            # This should hit each ParameterHandler and dicts within them
            sorted_dict[key] = sort_smirnoff_dict(val)
        elif isinstance(val, list):
            # Handle case of ParameterLists, which show up in
            # the smirnoff dicts as lists of dicts
            new_parameter_list = list()
            for param in val:
                new_parameter_list.append(sort_smirnoff_dict(param))
            sorted_dict[key] = new_parameter_list
        else:
            # Handle metadata or the bottom of a recursive dict
            sorted_dict[key] = val
    return sorted_dict