Source code for graphein.ppi.graphs

"""Functions for constructing a PPI PPIGraphConfig from STRINGdb and BIOGRID."""
# %%
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>, Ramon Vinas
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
import logging
from typing import Callable, List, Optional

import networkx as nx

from graphein.ppi.config import PPIGraphConfig
from graphein.utils.utils import (
    annotate_edge_metadata,
    annotate_graph_metadata,
    annotate_node_metadata,
    compute_edges,
)

log = logging.getLogger(__name__)


EDGE_COLOR_MAPPING = {"string": "r", "biogrid": "b"}


[docs]def parse_kwargs_from_config(config: PPIGraphConfig) -> PPIGraphConfig: """ If configs for STRING and BIOGRID are provided in the Global :ref:`~graphein.ppi.config.PPIGraphConfig`, we update the kwargs :param config: PPI graph configuration object. :type config: PPIGraphConfig :return: config with updated config.kwargs :rtype: PPIGraphConfig """ if config.string_config is not None: string_config_dict = { f"STRING_{k}": v for k, v in dict(config.string_config.items()) } config.kwargs = config.kwargs.update(string_config_dict) if config.biogrid_config is not None: biogrid_config_dict = { f"BIOGRID_{k}": v for k, v in dict(config.biogrid_config.items()) } config.kwargs = config.kwargs.update(biogrid_config_dict) return config
[docs]def compute_ppi_graph( protein_list: List[str], edge_construction_funcs: List[Callable], graph_annotation_funcs: Optional[List[Callable]] = None, node_annotation_funcs: Optional[List[Callable]] = None, edge_annotation_funcs: Optional[List[Callable]] = None, config: Optional[PPIGraphConfig] = None, ) -> nx.Graph: """ Computes a PPI Graph from a list of protein IDs. This is the core function for PPI graph construction. :param protein_list: List of protein identifiers :type protein_list: List[str] :param edge_construction_funcs: List of functions to construct edges with :type edge_construction_funcs: List[Callable], optional :param graph_annotation_funcs: List of functions to annotate graph metadata :type graph_annotation_funcs: List[Callable], optional :param node_annotation_funcs: List of functions to annotate node metadata :type node_annotation_funcs: List[Callable], optional :param edge_annotation_funcs: List of function to annotate edge metadata :type edge_annotation_funcs: List[Callable], optional :param config: Config object specifying additional parameters for STRING and BIOGRID API calls :type config: PPIGraphConfig, optional :return: ``nx.Graph`` of PPI network :rtype: nx.Graph """ # Load default config if none supplied if config is None: config = PPIGraphConfig() # Parse kwargs from config config = parse_kwargs_from_config(config) # Create graph and add proteins as nodes G = nx.Graph( protein_list=protein_list, sources=[], ncbi_taxon_id=config.ncbi_taxon_id, ) G.add_nodes_from(protein_list) log.debug(f"Added {len(protein_list)} nodes to graph") nx.set_node_attributes( G, dict(zip(protein_list, protein_list)), "protein_id", ) # Annotate additional graph metadata if graph_annotation_funcs is not None: G = annotate_graph_metadata(G, graph_annotation_funcs) # Annotate additional node metadata if node_annotation_funcs is not None: G = annotate_node_metadata(G, node_annotation_funcs) # Add edges G = compute_edges(G, edge_construction_funcs) # Annotate additional edge metadata if edge_annotation_funcs is not None: G = annotate_edge_metadata(G, edge_annotation_funcs) return G
if __name__ == "__main__": from functools import partial import matplotlib.pyplot as plt from graphein.ppi.edges import add_biogrid_edges, add_string_edges from graphein.ppi.features.node_features import add_sequence_to_nodes from graphein.protein.features.sequence.sequence import molecular_weight protein_list = [ "CDC42", "CDK1", "KIF23", "PLK1", "RAC2", "RACGAP1", "RHOA", "RHOB", ] config = PPIGraphConfig() kwargs = config.kwargs g = compute_ppi_graph( protein_list=protein_list, edge_construction_funcs=[ partial(add_string_edges, kwargs=kwargs), partial(add_biogrid_edges, kwargs=kwargs), ], node_annotation_funcs=[add_sequence_to_nodes, molecular_weight], ) edge_colors = [ "r" if g[u][v]["kind"] == {"string"} else "b" if g[u][v]["kind"] == {"biogrid"} else "y" for u, v in g.edges() ] print(g.nodes()) print(nx.info(g)) nx.draw(g, with_labels=True, edge_color=edge_colors) plt.show()