Source code for graphein.rna.edges

"""Functions to compute edges for an RNA secondary structure graph."""
# %%
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>, Emmanuele Rossi, Eric Ma
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
import logging

import networkx as nx

from graphein.rna.constants import (
    CANONICAL_BASE_PAIRINGS,
    PSEUDOKNOT_CLOSING_SYMBOLS,
    PSEUDOKNOT_OPENING_SYMBOLS,
    SIMPLE_DOTBRACKET_NOTATION,
    WOBBLE_BASE_PAIRINGS,
)

log = logging.getLogger(__name__)


[docs]def check_base_pairing_type(base_1: str, base_2: str) -> str: """ Checks type and validity of base pairing interactions. :param base_1: str RNA Base letter for base 1. :type base_1: str :param base_2: str RNA base letter for base 2. :type base_2: str :return: string referencing the type of base pairing ``"canonical"``, ``"wobble"`` or ``"invalid"``. :rtype: str """ try: if base_2 in CANONICAL_BASE_PAIRINGS[base_1]: return "canonical" elif base_2 in WOBBLE_BASE_PAIRINGS[base_1]: return "wobble" except KeyError: return "invalid"
[docs]def add_phosphodiester_bonds(G: nx.Graph) -> nx.Graph: """ Adds phosphodiester bonds between adjacent nucleotides to an RNA secondary structure graph. :param G: RNA Graph to add edges to. :type G: nx.Graph :return: RNA graph with ``phosphodiester_bond`` edges added. :rtype: nx.Graph """ # Iterate over dotbracket to build connectivity bases = [] for i, c in enumerate(G.graph["dotbracket"]): # Add adjacent edges (phosphodiester_bonds) if i > 0: G.add_edge(i, i - 1, attr="phosphodiester_bond", color="b") log.debug("Added phosphodiester bonds as edges") return G
[docs]def add_base_pairing_interactions(G: nx.Graph) -> nx.Graph: """ Adds base pairing interactions between nucleotides to an RNA secondary structure graph. :param G: RNA Graph to add edges to. :type G: nx.Graph :raises ValueError: if ``dotbracket`` contains an unsupported character. :return: RNA graph with ``base_pairing`` edges added. :rtype: nx.Graph """ # Check sequence is used check_base_pairing = "sequence" in G.graph.keys() # Iterate over dotbracket to build connectivity bases = [] for i, c in enumerate(G.graph["dotbracket"]): # Add base_pairing interactions if c == "(": bases.append(i) elif c == ")": neighbor = bases.pop() if check_base_pairing: pairing_type = check_base_pairing_type( G.nodes[i]["nucleotide"], G.nodes[neighbor]["nucleotide"] ) else: pairing_type = "unknown" G.add_edge( i, neighbor, attr="base_pairing", pairing_type=pairing_type, color="r", ) elif c in [".", "[", "]", "{", "}", "<", ">"]: continue else: raise ValueError("Input is not in dot-bracket notation!") log.debug("Added base_pairing interactions as edges") return G
[docs]def add_pseudoknots(G: nx.Graph) -> nx.Graph: """ Adds pseudoknots nucleotides to an RNA secondary structure graph. :param G: RNA Graph to add edges to. :type G: nx.Graph :return: RNA graph with pseudoknot edges added. :rtype: nx.Graph """ # Check sequence is used check_base_pairing = "sequence" in G.graph.keys() # Iterate over dotbracket to build connectivity knot_bases_1 = [] # for [[[]]] knots knot_bases_2 = [] # for {{{}}} knots knot_bases_3 = [] # for <<<>>> knots for i, c in enumerate(G.graph["dotbracket"]): if c in PSEUDOKNOT_OPENING_SYMBOLS: if c == "<": knot_bases_3.append(i) elif c == "[": knot_bases_1.append(i) elif c == "{": knot_bases_2.append(i) elif c in PSEUDOKNOT_CLOSING_SYMBOLS: if c == ">": neighbor = knot_bases_3.pop() elif c == "]": neighbor = knot_bases_1.pop() elif c == "}": neighbor = knot_bases_2.pop() if check_base_pairing: pairing_type = check_base_pairing_type( G.nodes[i]["nucleotide"], G.nodes[neighbor]["nucleotide"] ) else: pairing_type = "unknown" G.add_edge( i, neighbor, attr="pseudoknot", pairing_type=pairing_type, color="g", ) elif c in SIMPLE_DOTBRACKET_NOTATION: continue else: raise ValueError("Input is not in dot-bracket notation!") log.debug("Added pseudoknot interactions as edges") return G
[docs]def add_all_dotbracket_edges(G: nx.Graph) -> nx.Graph: """ Adds phosphodiester bonds between adjacent nucleotides and base_pairing interactions to an RNA secondary structure graph. :param G: RNA Graph to add edges to. :type G: nx.Graph :return: RNA graph with ``phosphodiester_bond`` and ``base_pairing`` edges added. :rtype: nx.Graph """ # Iterate over dotbracket to build connectivity G = add_phosphodiester_bonds(G) G = add_base_pairing_interactions(G) G = add_pseudoknots(G) return G