Source code for cdt.utils.graph

"""Utilities for graph not included in Networkx.

.. MIT License
..
.. Copyright (c) 2018 Diviyan Kalainathan
..
.. Permission is hereby granted, free of charge, to any person obtaining a copy
.. of this software and associated documentation files (the "Software"), to deal
.. in the Software without restriction, including without limitation the rights
.. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
.. copies of the Software, and to permit persons to whom the Software is
.. furnished to do so, subject to the following conditions:
..
.. The above copyright notice and this permission notice shall be included in all
.. copies or substantial portions of the Software.
..
.. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
.. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
.. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
.. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
.. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
.. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
.. SOFTWARE.
"""

import networkx as nx
from copy import deepcopy
import operator
import numpy as np
import scipy.stats.mstats as stat
from numpy import linalg as LA


[docs]def network_deconvolution(mat, **kwargs): """Python implementation/translation of network deconvolution by MIT-KELLIS LAB. .. note:: For networkx graphs, use the cdt.utils.graph.remove_indirect_links function code author:gidonro [Github username](https://github.com/gidonro/Network-Deconvolution) LICENSE: MIT-KELLIS LAB AUTHORS: Algorithm was programmed by Soheil Feizi. Paper authors are S. Feizi, D. Marbach, M. M?©dard and M. Kellis Python implementation: Gideon Rosenthal For more details, see the following paper: Network Deconvolution as a General Method to Distinguish Direct Dependencies over Networks By: Soheil Feizi, Daniel Marbach, Muriel Médard and Manolis Kellis Nature Biotechnology Args: mat (numpy.ndarray): matrix, if it is a square matrix, the program assumes it is a relevance matrix where mat(i,j) represents the similarity content between nodes i and j. Elements of matrix should be non-negative. beta (float): Scaling parameter, the program maps the largest absolute eigenvalue of the direct dependency matrix to beta. It should be between 0 and 1. alpha (float): fraction of edges of the observed dependency matrix to be kept in deconvolution process. control (int): if 0, displaying direct weights for observed interactions, if 1, displaying direct weights for both observed and non-observed interactions. Returns: numpy.ndarray: Output deconvolved matrix (direct dependency matrix). Its components represent direct edge weights of observed interactions. Choosing top direct interactions (a cut-off) depends on the application and is not implemented in this code. Example: >>> from cdt.utils.graph import network_deconvolution >>> import networkx as nx >>> # Generate sample data >>> from cdt.data import AcyclicGraphGenerator >>> graph = AcyclicGraphGenerator(linear).generate()[1] >>> adj_mat = nx.adjacency_matrix(graph).todense() >>> output = network_deconvolution(adj_mat) .. note:: To apply ND on regulatory networks, follow steps explained in Supplementary notes 1.4.1 and 2.1 and 2.3 of the paper. In this implementation, input matrices are made symmetric. """ alpha = kwargs.get('alpha', 1) beta = kwargs.get('beta', 0.99) control = kwargs.get('control', 0) # ToDO : ASSERTS try: assert beta < 1 or beta > 0 assert alpha <= 1 or alpha > 0 except AssertionError: raise ValueError("alpha must be in ]0, 1] and beta in [0, 1]") # Processing the input matrix, diagonal values are filtered np.fill_diagonal(mat, 0) # Thresholding the input matrix y = stat.mquantiles(mat[:], prob=[1 - alpha]) th = mat >= y mat_th = mat * th # Making the matrix symetric if already not mat_th = (mat_th + mat_th.T) / 2 # Eigen decomposition Dv, U = LA.eigh(mat_th) D = np.diag((Dv)) lam_n = np.abs(np.min(np.min(np.diag(D)), 0)) lam_p = np.abs(np.max(np.max(np.diag(D)), 0)) m1 = lam_p * (1 - beta) / beta m2 = lam_n * (1 + beta) / beta m = max(m1, m2) # network deconvolution for i in range(D.shape[0]): D[i, i] = (D[i, i]) / (m + D[i, i]) mat_new1 = np.dot(U, np.dot(D, LA.inv(U))) # Displying direct weights if control == 0: ind_edges = (mat_th > 0) * 1.0 ind_nonedges = (mat_th == 0) * 1.0 m1 = np.max(np.max(mat * ind_nonedges)) m2 = np.min(np.min(mat_new1)) mat_new2 = (mat_new1 + np.max(m1 - m2, 0)) * ind_edges + (mat * ind_nonedges) else: m2 = np.min(np.min(mat_new1)) mat_new2 = (mat_new1 + np.max(-m2, 0)) # linearly mapping the deconvolved matrix to be between 0 and 1 m1 = np.min(np.min(mat_new2)) m2 = np.max(np.max(mat_new2)) mat_nd = (mat_new2 - m1) / (m2 - m1) return mat_nd
[docs]def clr(M, **kwargs): """Implementation of the Context Likelihood or Relatedness Network algorithm. .. note:: For networkx graphs, use the cdt.utils.graph.remove_indirect_links function Args: mat (numpy.ndarray): matrix, if it is a square matrix, the program assumes it is a relevance matrix where mat(i,j) represents the similarity content between nodes i and j. Elements of matrix should be non-negative. Returns: numpy.ndarray: Output deconvolved matrix (direct dependency matrix). Its components represent direct edge weights of observed interactions. Example: >>> from cdt.utils.graph import clr >>> import networkx as nx >>> # Generate sample data >>> from cdt.data import AcyclicGraphGenerator >>> graph = AcyclicGraphGenerator(linear).generate()[1] >>> adj_mat = nx.adjacency_matrix(graph).todense() >>> output = clr(adj_mat) .. note:: Ref:Jeremiah J. Faith, Boris Hayete, Joshua T. Thaden, Ilaria Mogno, Jamey Wierzbowski, Guillaume Cottarel, Simon Kasif, James J. Collins, and Timothy S. Gardner. Large-scale mapping and validation of escherichia coli transcriptional regulation from a compendium of expression profiles. PLoS Biology, 2007 """ R = np.zeros(M.shape) Id = [[0, 0] for i in range(M.shape[0])] for i in range(M.shape[0]): mu_i = np.mean(M[i, :]) sigma_i = np.std(M[i, :]) Id[i] = [mu_i, sigma_i] for i in range(M.shape[0]): for j in range(i + 1, M.shape[0]): z_i = np.max([0, (M[i, j] - Id[i][0]) / Id[i][0]]) z_j = np.max([0, (M[i, j] - Id[j][0]) / Id[j][0]]) R[i, j] = np.sqrt(z_i**2 + z_j**2) R[j, i] = R[i, j] # Symmetric return R
[docs]def aracne(m, **kwargs): """Implementation of the ARACNE algorithm. .. note:: For networkx graphs, use the cdt.utils.graph.remove_indirect_links function Args: mat (numpy.ndarray): matrix, if it is a square matrix, the program assumes it is a relevance matrix where mat(i,j) represents the similarity content between nodes i and j. Elements of matrix should be non-negative. Returns: numpy.ndarray: Output deconvolved matrix (direct dependency matrix). Its components represent direct edge weights of observed interactions. Example: >>> from cdt.utils.graph import aracne >>> import networkx as nx >>> # Generate sample data >>> from cdt.data import AcyclicGraphGenerator >>> graph = AcyclicGraphGenerator(linear).generate()[1] >>> adj_mat = nx.adjacency_matrix(graph).todense() >>> output = aracne(adj_mat) .. note:: Ref: ARACNE: An Algorithm for the Reconstruction of Gene Regulatory Networks in a Mammalian Cellular Context Adam A Margolin, Ilya Nemenman, Katia Basso, Chris Wiggins, Gustavo Stolovitzky, Riccardo Dalla Favera and Andrea Califano DOI: https://doi.org/10.1186/1471-2105-7-S1-S7 """ I0 = kwargs.get('I0', 0.0) # No default thresholding W0 = kwargs.get('W0', 0.05) # thresholding m = np.where(m > I0, m, 0) # Finding triplets and filtering them for i in range(m.shape[0]-2): for j in range(i+1, m.shape[0]-1): for k in range(j+1, m.shape[0]): triplet = [m[i, j], m[j, k], m[i, k]] min_index, min_value = min(enumerate(triplet), key=operator.itemgetter(1)) if 0 < min_value < W0: if min_index == 0: m[i, j] = m[j, i] = 0. elif min_index == 1: m[j, k] = m[k, j] = 0. else: m[i, k] = m[k, i] = 0. return m
[docs]def dagify_min_edge(g): """Input a graph and output a DAG. The heuristic is to reverse the edge with the lowest score of the cycle if possible, else remove it. Args: g (networkx.DiGraph): Graph to modify to output a DAG Returns: networkx.DiGraph: DAG made out of the input graph. Example: >>> from cdt.utils.graph import dagify_min_edge >>> import networkx as nx >>> import numpy as np >>> # Generate sample data >>> graph = nx.DiGraph((np.ones(4) - np.eye(4)) * np.random.uniform(size=(4,4))) >>> output = dagify_min_edge(graph) """ ncycles = len(list(nx.simple_cycles(g))) while not nx.is_directed_acyclic_graph(g): cycle = next(nx.simple_cycles(g)) edges = [(cycle[-1], cycle[0])] scores = [(g[cycle[-1]][cycle[0]]['weight'])] for i, j in zip(cycle[:-1], cycle[1:]): edges.append((i, j)) scores.append(g[i][j]['weight']) i, j = edges[scores.index(min(scores))] gc = deepcopy(g) gc.remove_edge(i, j) gc.add_edge(j, i) ngc = len(list(nx.simple_cycles(gc))) if ngc < ncycles: g.add_edge(j, i, weight=min(scores)) g.remove_edge(i, j) ncycles = ngc return g