Source code for graphein.molecule.features.graph.molecule

"""Functions for featurising Small Molecule Graphs."""
# %%
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
from __future__ import annotations

from typing import Dict, List, Optional, Union

import networkx as nx
import numpy as np
import pandas as pd

from graphein.utils.utils import import_message

try:
    from rdkit import Chem
    from rdkit.Chem import Descriptors
except ImportError:
    import_message(
        "graphein.molecule.features.graph.molecule", "rdkit", "rdkit", True
    )


[docs]def mol_descriptors( g: nx.Graph, descriptor_list: Optional[List[str]] = None, return_array: bool = False, return_series: bool = False, ) -> Union[np.ndarray, pd.Series, Dict[str, Union[float, int]]]: """Adds global molecular descriptors to the graph. :param g: The graph to add the descriptors to. :type g: nx.Graph :param descriptor_list: The list of descriptors to add. If ``None``, all descriptors are added. :type descriptor_list: Optional[List[str]] :param return_array: If ``True``, the descriptors are returned as a ``np.ndarray``. :type return_array: bool :param return_series: If ``True``, the descriptors are returned as a ``pd.Series``. :return: The descriptors as a dictionary (default) ``np.ndarray`` or ``pd.Series``. :rtype: Union[np.ndarray, pd.Series, Dict[str, Union[float, int]]] """ mol = g.graph["rdmol"] # Retrieve list of possible descriptors descriptors = {d[0]: d[1] for d in Descriptors.descList} # Subset descriptors to those provided if descriptor_list is not None: descriptors = { k: v for k, v in descriptors.items() if k in descriptor_list } # Compute descriptors desc = {d: descriptors[d](mol) for d in descriptors} # Process Outformat if return_array: desc = np.array(desc.values()) g.graph["descriptors"] = desc elif return_series: desc = pd.Series(desc) g.graph["descriptors"] = desc else: g.graph.update(desc) return desc