Source code for timeawarepc.pcalg

"""A graph generator based on the PC algorithm [Kalisch2007] via https://github.com/keiichishima/pcalg
"""

from itertools import combinations, permutations
import logging
import numpy as np
import networkx as nx
from timeawarepc.pcalg_helpers import *

_logger = logging.getLogger(__name__)

def _create_complete_graph(node_ids):
    """Create a complete graph from the list of node ids.

    Args:
        node_ids: a list of node ids

    Returns:
        An undirected graph (as a networkx.Graph)
    """
    g = nx.Graph()
    g.add_nodes_from(node_ids)
    for (i, j) in combinations(node_ids, 2):
        g.add_edge(i, j)
    return g

[docs]def estimate_skeleton(indep_test_func, data_matrix, alpha, **kwargs): """Estimate a skeleton graph from the statistis information. Args: indep_test_func: the function name for a conditional independency test. data_matrix: data (as a numpy array). alpha: the significance level. kwargs: 'max_reach': maximum value of l (see the code). The value depends on the underlying distribution. 'method': if 'stable' given, use stable-PC algorithm (see [Colombo2014]). 'init_graph': initial structure of skeleton graph (as a networkx.Graph). If not specified, a complete graph is used. other parameters may be passed depending on the indep_test_func()s. Returns: g: a skeleton graph (as a networkx.Graph). sep_set: a separation set (as an 2D-array of set()). [Colombo2014] Diego Colombo and Marloes H Maathuis. Order-independent constraint-based causal structure learning. In The Journal of Machine Learning Research, Vol. 15, pp. 3741-3782, 2014. """ def method_stable(kwargs): return ('method' in kwargs) and kwargs['method'] == "stable" node_ids = range(data_matrix.shape[1]) node_size = data_matrix.shape[1] sep_set = [[set() for i in range(node_size)] for j in range(node_size)] if 'init_graph' in kwargs: g = kwargs['init_graph'] if not isinstance(g, nx.Graph): raise ValueError elif not g.number_of_nodes() == len(node_ids): raise ValueError('init_graph not matching data_matrix shape') for (i, j) in combinations(node_ids, 2): if (not g.has_edge(i, j)): sep_set[i][j] = None sep_set[j][i] = None else: g = _create_complete_graph(node_ids) l = 0 while True: cont = False remove_edges = [] for (i, j) in permutations(node_ids, 2): adj_i = list(g.neighbors(i)) if j not in adj_i: continue else: adj_i.remove(j) #if g.has_edge(i,j): if len(adj_i) >= l: _logger.debug('testing %s and %s' % (i,j)) _logger.debug('neighbors of %s are %s' % (i, str(adj_i))) if len(adj_i) < l: continue for k in combinations(adj_i, l): _logger.debug('indep prob of %s and %s with subset %s' % (i, j, str(k))) p_val = indep_test_func(data_matrix, i, j, set(k), **kwargs) _logger.debug('p_val is %s' % str(p_val)) if p_val > alpha: if g.has_edge(i, j): _logger.debug('p: remove edge (%s, %s)' % (i, j)) if method_stable(kwargs): remove_edges.append((i, j)) else: g.remove_edge(i, j) sep_set[i][j] |= set(k) sep_set[j][i] |= set(k) break cont = True l += 1 if method_stable(kwargs): g.remove_edges_from(remove_edges) if cont is False: break if ('max_reach' in kwargs) and (l > kwargs['max_reach']): break return (g, sep_set)
[docs]def estimate_cpdag(skel_graph, sep_set): """Estimate a CPDAG from the skeleton graph and separation sets returned by the estimate_skeleton() function. Args: skel_graph: A skeleton graph (an undirected networkx.Graph). sep_set: An 2D-array of separation set. The contents look like something like below. sep_set[i][j] = set([k, l, m]) Returns: An estimated DAG. """ dag = skel_graph.to_directed() node_ids = skel_graph.nodes() for (i, j) in combinations(node_ids, 2): adj_i = set(dag.successors(i)) if j in adj_i: continue adj_j = set(dag.successors(j)) if i in adj_j: continue if sep_set[i][j] is None: continue common_k = adj_i & adj_j for k in common_k: if k not in sep_set[i][j]: if dag.has_edge(k, i): _logger.debug('S: remove edge (%s, %s)' % (k, i)) dag.remove_edge(k, i) if dag.has_edge(k, j): _logger.debug('S: remove edge (%s, %s)' % (k, j)) dag.remove_edge(k, j) def _has_both_edges(dag, i, j): return dag.has_edge(i, j) and dag.has_edge(j, i) def _has_any_edge(dag, i, j): return dag.has_edge(i, j) or dag.has_edge(j, i) def _has_one_edge(dag, i, j): return ((dag.has_edge(i, j) and (not dag.has_edge(j, i))) or (not dag.has_edge(i, j)) and dag.has_edge(j, i)) def _has_no_edge(dag, i, j): return (not dag.has_edge(i, j)) and (not dag.has_edge(j, i)) # For all the combination of nodes i and j, apply the following # rules. old_dag = dag.copy() while True: for (i, j) in combinations(node_ids, 2): # Rule 1: Orient i-j into i->j whenever there is an arrow k->i # such that k and j are nonadjacent. # # Check if i-j. if _has_both_edges(dag, i, j): # Look all the predecessors of i. for k in dag.predecessors(i): # Skip if there is an arrow i->k. if dag.has_edge(i, k): continue # Skip if k and j are adjacent. if _has_any_edge(dag, k, j): continue # Make i-j into i->j _logger.debug('R1: remove edge (%s, %s)' % (j, i)) dag.remove_edge(j, i) break # Rule 2: Orient i-j into i->j whenever there is a chain # i->k->j. # # Check if i-j. if _has_both_edges(dag, i, j): # Find nodes k where k is i->k. succs_i = set() for k in dag.successors(i): if not dag.has_edge(k, i): succs_i.add(k) # Find nodes j where j is k->j. preds_j = set() for k in dag.predecessors(j): if not dag.has_edge(j, k): preds_j.add(k) # Check if there is any node k where i->k->j. if len(succs_i & preds_j) > 0: # Make i-j into i->j _logger.debug('R2: remove edge (%s, %s)' % (j, i)) dag.remove_edge(j, i) # Rule 3: Orient i-j into i->j whenever there are two chains # i-k->j and i-l->j such that k and l are nonadjacent. # # Check if i-j. if _has_both_edges(dag, i, j): # Find nodes k where i-k. adj_i = set() for k in dag.successors(i): if dag.has_edge(k, i): adj_i.add(k) # For all the pairs of nodes in adj_i, for (k, l) in combinations(adj_i, 2): # Skip if k and l are adjacent. if _has_any_edge(dag, k, l): continue # Skip if not k->j. if dag.has_edge(j, k) or (not dag.has_edge(k, j)): continue # Skip if not l->j. if dag.has_edge(j, l) or (not dag.has_edge(l, j)): continue # Make i-j into i->j. _logger.debug('R3: remove edge (%s, %s)' % (j, i)) dag.remove_edge(j, i) break # Rule 4: Orient i-j into i->j whenever there are two chains # i-k->l and k->l->j such that k and j are nonadjacent. # # However, this rule is not necessary when the PC-algorithm # is used to estimate a DAG. if nx.is_isomorphic(dag, old_dag): break old_dag = dag.copy() return dag