Source code for genewalk.null_distributions

"""This module implements functions related to the construction of a null
distribution for GeneWalk networks."""
import logging
import networkx as nx

logger = logging.getLogger('genewalk.get_null_distributions')


[docs]def get_rand_graph(mg): """Return a random graph with the same degree distribution as the input. Parameters ---------- mg : networkx.MultiGraph An input graph based on which a random graph is generated. Returns ------- networkx.MultiGraph A random graph whose degree distribution matches that of the output. """ # this is not randomized: order is same d_seq = sorted([mg.degree(n) for n in mg.nodes()], reverse=True) # creates random multigraph with same degree sequence rg = nx.configuration_model(d_seq) # the node labels are numbers which gives problems in word2vec # so adjust 0 to n0 mapping = {n: 'n%s' % n for n in rg.nodes()} rg = nx.relabel_nodes(rg, mapping, copy=False) return rg
[docs]def get_null_distributions(rg, nv): """Return a distribution with similarity values between (random) node vectors originating from the input randomized graph. """ srd = [] # Generate null distributions from random node vectors for node in nx.nodes(rg): connections = list(rg[node]) # only get sims for non self connections if node in connections: connections.remove(node) n_con_source = len(connections) if n_con_source > 0: sim_dist = list(1-nv.distances(node, other_words=connections)) srd += sim_dist return srd