Source code for graphein.protein.features.utils

"""Utility functions to work with graph-level features."""
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
import networkx as nx
import numpy as np
import pandas as pd


[docs]def convert_graph_dict_feat_to_series( G: nx.Graph, feature_name: str ) -> nx.Graph: """ Takes in a graph and a graph-level ``feature_name``. Converts this feature to a ``pd.Series``. This is useful as some features are output as dictionaries and we wish to standardise this. :param G: nx.Graph containing ``G.graph[f"{feature_name}"]`` (``Dict[Any, Any]``). :type G: nx.Graph :param feature_name: Name of feature to convert to dictionary. :type feature_name: str :return: nx.Graph containing ``G.graph[f"{feature_name}"]: pd.Series``. :rtype: nx.Graph """ G.graph[feature_name] = pd.Series(G.graph[feature_name]) return G
[docs]def aggregate_graph_feature_over_chains( G: nx.Graph, feature_name: str, aggregation_type: str ) -> nx.Graph: """ Performs aggregation of a feature over the chains. E.g. sums/averages/min/max molecular weights for each chain. :param G: nx.Graph of protein containing chain-specific features. :type G: nx.Graph :param feature_name: Name of features to aggregate. :type feature_name: str :param aggregation_type: Type of aggregation to perform (``"min"`, ``"max"``, ``"sum"``, ``"mean"``). :type aggregation_type: str :raises NameError: If ``aggregation_type`` is not one of ``"min"`, ``"max"``, ``"sum"``, ``"mean"``. :return: nx.Graph of protein with a new aggregated feature ``G.graph[f"{feature_name}_{aggregation_type}"]``. :rtype: nx.Graph """ if aggregation_type == "mean": func = np.mean elif aggregation_type == "max": func = np.max elif aggregation_type == "sum": func = np.sum elif aggregation_type == "min": func = np.min else: raise NameError( "Unsupported aggregation type. Please use mean, max, sum or min." ) G.graph[f"{feature_name}_{aggregation_type}"] = func( [G.graph[f"{feature_name}_{c}"] for c in G.graph["chain_ids"]] ) return G