"""Functions to compute edges for an RNA secondary structure graph."""
# %%
# Graphein
# Author: Arian Jamasb <arian@jamasb.io>, Emmanuele Rossi, Eric Ma
# License: MIT
# Project Website: https://github.com/a-r-j/graphein
# Code Repository: https://github.com/a-r-j/graphein
import logging
import networkx as nx
from graphein.rna.constants import (
CANONICAL_BASE_PAIRINGS,
PSEUDOKNOT_CLOSING_SYMBOLS,
PSEUDOKNOT_OPENING_SYMBOLS,
SIMPLE_DOTBRACKET_NOTATION,
WOBBLE_BASE_PAIRINGS,
)
log = logging.getLogger(__name__)
[docs]def check_base_pairing_type(base_1: str, base_2: str) -> str:
"""
Checks type and validity of base pairing interactions.
:param base_1: str RNA Base letter for base 1.
:type base_1: str
:param base_2: str RNA base letter for base 2.
:type base_2: str
:return: string referencing the type of base pairing ``"canonical"``, ``"wobble"`` or ``"invalid"``.
:rtype: str
"""
try:
if base_2 in CANONICAL_BASE_PAIRINGS[base_1]:
return "canonical"
elif base_2 in WOBBLE_BASE_PAIRINGS[base_1]:
return "wobble"
except KeyError:
return "invalid"
[docs]def add_phosphodiester_bonds(G: nx.Graph) -> nx.Graph:
"""
Adds phosphodiester bonds between adjacent nucleotides to an RNA secondary structure graph.
:param G: RNA Graph to add edges to.
:type G: nx.Graph
:return: RNA graph with ``phosphodiester_bond`` edges added.
:rtype: nx.Graph
"""
# Iterate over dotbracket to build connectivity
bases = []
for i, c in enumerate(G.graph["dotbracket"]):
# Add adjacent edges (phosphodiester_bonds)
if i > 0:
G.add_edge(i, i - 1, attr="phosphodiester_bond", color="b")
log.debug("Added phosphodiester bonds as edges")
return G
[docs]def add_base_pairing_interactions(G: nx.Graph) -> nx.Graph:
"""
Adds base pairing interactions between nucleotides to an RNA secondary structure graph.
:param G: RNA Graph to add edges to.
:type G: nx.Graph
:raises ValueError: if ``dotbracket`` contains an unsupported character.
:return: RNA graph with ``base_pairing`` edges added.
:rtype: nx.Graph
"""
# Check sequence is used
check_base_pairing = "sequence" in G.graph.keys()
# Iterate over dotbracket to build connectivity
bases = []
for i, c in enumerate(G.graph["dotbracket"]):
# Add base_pairing interactions
if c == "(":
bases.append(i)
elif c == ")":
neighbor = bases.pop()
if check_base_pairing:
pairing_type = check_base_pairing_type(
G.nodes[i]["nucleotide"], G.nodes[neighbor]["nucleotide"]
)
else:
pairing_type = "unknown"
G.add_edge(
i,
neighbor,
attr="base_pairing",
pairing_type=pairing_type,
color="r",
)
elif c in [".", "[", "]", "{", "}", "<", ">"]:
continue
else:
raise ValueError("Input is not in dot-bracket notation!")
log.debug("Added base_pairing interactions as edges")
return G
[docs]def add_pseudoknots(G: nx.Graph) -> nx.Graph:
"""
Adds pseudoknots nucleotides to an RNA secondary structure graph.
:param G: RNA Graph to add edges to.
:type G: nx.Graph
:return: RNA graph with pseudoknot edges added.
:rtype: nx.Graph
"""
# Check sequence is used
check_base_pairing = "sequence" in G.graph.keys()
# Iterate over dotbracket to build connectivity
knot_bases_1 = [] # for [[[]]] knots
knot_bases_2 = [] # for {{{}}} knots
knot_bases_3 = [] # for <<<>>> knots
for i, c in enumerate(G.graph["dotbracket"]):
if c in PSEUDOKNOT_OPENING_SYMBOLS:
if c == "<":
knot_bases_3.append(i)
elif c == "[":
knot_bases_1.append(i)
elif c == "{":
knot_bases_2.append(i)
elif c in PSEUDOKNOT_CLOSING_SYMBOLS:
if c == ">":
neighbor = knot_bases_3.pop()
elif c == "]":
neighbor = knot_bases_1.pop()
elif c == "}":
neighbor = knot_bases_2.pop()
if check_base_pairing:
pairing_type = check_base_pairing_type(
G.nodes[i]["nucleotide"], G.nodes[neighbor]["nucleotide"]
)
else:
pairing_type = "unknown"
G.add_edge(
i,
neighbor,
attr="pseudoknot",
pairing_type=pairing_type,
color="g",
)
elif c in SIMPLE_DOTBRACKET_NOTATION:
continue
else:
raise ValueError("Input is not in dot-bracket notation!")
log.debug("Added pseudoknot interactions as edges")
return G
[docs]def add_all_dotbracket_edges(G: nx.Graph) -> nx.Graph:
"""
Adds phosphodiester bonds between adjacent nucleotides and base_pairing interactions to an RNA secondary structure graph.
:param G: RNA Graph to add edges to.
:type G: nx.Graph
:return: RNA graph with ``phosphodiester_bond`` and ``base_pairing`` edges added.
:rtype: nx.Graph
"""
# Iterate over dotbracket to build connectivity
G = add_phosphodiester_bonds(G)
G = add_base_pairing_interactions(G)
G = add_pseudoknots(G)
return G