From 1c1205b7b3ee59a68533df7f8ff2aeb70454a090 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Wed, 5 Jul 2017 11:11:32 -0400 Subject: [PATCH 1/7] Add initial graph layout algorithm --- datashader/layout.py | 145 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 datashader/layout.py diff --git a/datashader/layout.py b/datashader/layout.py new file mode 100644 index 000000000..260db8ab7 --- /dev/null +++ b/datashader/layout.py @@ -0,0 +1,145 @@ +"""Assign coordinates to the nodes of a graph. + +Timothée Poisot's `nxfa2` is the original implementation of the main +algorithm. + +.. _nxfa2: + https://github.com/tpoisot/nxfa2 +""" + +from __future__ import absolute_import, division, print_function + +import numpy as np +import pandas as pd +import param +import scipy as sp + + +def _convert_graph_with_positions_to_dataframes(graph, pos): + """ + Convert NetworkX graph with associated positions into two dataframes. + + In a NetworkX graph, each edge can have its own independent attributes. One + edge can have a different set of attributes than another edge. This means + we have to assign a default weight value when converting to dataframes. + """ + nodes = pd.DataFrame() + for node, xy in zip(graph, pos): + nodes = nodes.append({'id': node, 'x': xy[0], 'y': xy[1]}, ignore_index=True) + + nodes['id'].astype(np.int32) + nodes = nodes.set_index('id') + + edges = pd.DataFrame() + for edge in graph.edges(): + edge_attributes = graph[edge[0]][edge[1]] + if 'weight' in edge_attributes: + weight = edge_attributes['weight'] + else: + weight = 1 + edges = edges.append({'source': edge[0], 'target': edge[1], 'weight': weight}, ignore_index=True) + + edges['source'].astype(np.int32) + edges['target'].astype(np.int32) + + return nodes, edges + + +class forceatlas2_layout(param.ParameterizedFunction): + """ + Assign coordinates to the nodes of a graph. + + This is a force-directed graph layout algorithm. + + .. _ForceAtlas2: + http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0098679&type=printable + """ + + def __call__(self, graph, iterations=10, linlog=False, pos=None, nohubs=False, k=None, dim=2): + """ + Parameters + ---------- + graph : networkx.Graph + The NetworkX graph to layout + iterations : int + Number of iterations + linlog : bool + Whether to use logarithmic attraction force + pos : ndarray + Initial positions for the given nodes + nohubs : bool + Whether to grant authorities (nodes with a high indegree) a + more central position than hubs (nodes with a high outdegree) + k : float + Compensates for the repulsion for nodes that are far away + from the center. Defaults to the inverse of the number of + nodes. + dim : int + Coordinate dimensions of each node. + + Returns + ------- + nodes, edges : pandas.DataFrame + """ + try: + import networkx as nx + except ImportError: + raise ImportError('install networkx to use this feature') + + # This comes from the sparse FR layout in NetworkX + A = nx.to_scipy_sparse_matrix(graph, dtype='f') + nnodes, _ = A.shape + + try: + A = A.tolil() + except Exception: + A = (sp.sparse.coo_matrix(A)).tolil() + if pos is None: + pos = np.asarray(np.random.random((nnodes, dim)), dtype=A.dtype) + else: + pos = pos.astype(A.dtype) + if k is None: + k = np.sqrt(1.0 / nnodes) + + # the initial "temperature" is about .1 of domain area (=1x1) + # this is the largest step allowed in the dynamics. + t = 0.1 + + # simple cooling scheme. + # linearly step down by dt on each iteration so last iteration is size dt. + dt = t / float(iterations + 1) + displacement = np.zeros((dim, nnodes)) + for iteration in range(iterations): + displacement *= 0 + for i in range(A.shape[0]): + # difference between this row's node position and all others + delta = (pos[i] - pos).T + + # distance between points + distance = np.sqrt((delta ** 2).sum(axis=0)) + + # enforce minimum distance of 0.01 + distance = np.where(distance < 0.01, 0.01, distance) + + # the adjacency matrix row + ai = np.asarray(A.getrowview(i).toarray()) + + # displacement "force" + dist = k * k / distance ** 2 + + if nohubs: + dist = dist / float(ai.sum(axis=1) + 1) + if linlog: + dist = np.log(dist + 1) + displacement[:, i] += (delta * (dist - ai * distance / k)).sum(axis=1) + + # update positions + length = np.sqrt((displacement ** 2).sum(axis=0)) + length = np.where(length < 0.01, 0.01, length) + pos += (displacement * t / length).T + + # cool temperature + t -= dt + + # Return the layout + return _convert_graph_with_positions_to_dataframes(graph, pos) From 592a8e1775ca16978e26141a9fecb8b3d326409a Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Wed, 5 Jul 2017 11:39:53 -0400 Subject: [PATCH 2/7] Remove unicode character --- datashader/layout.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datashader/layout.py b/datashader/layout.py index 260db8ab7..b270dde75 100644 --- a/datashader/layout.py +++ b/datashader/layout.py @@ -1,6 +1,6 @@ """Assign coordinates to the nodes of a graph. -Timothée Poisot's `nxfa2` is the original implementation of the main +Timothee Poisot's `nxfa2` is the original implementation of the main algorithm. .. _nxfa2: From 6a791a8b05d133b7f749b39eafa46169890fb110 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Sat, 8 Jul 2017 22:16:44 -0400 Subject: [PATCH 3/7] Refactor to use dataframes for input We removed the NetworkX requirement and can now take two dataframes (nodes and edges) as input, like the bundling module. --- datashader/layout.py | 86 ++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/datashader/layout.py b/datashader/layout.py index b270dde75..55c32b445 100644 --- a/datashader/layout.py +++ b/datashader/layout.py @@ -15,34 +15,30 @@ import scipy as sp -def _convert_graph_with_positions_to_dataframes(graph, pos): - """ - Convert NetworkX graph with associated positions into two dataframes. +def _extract_points_from_nodes(nodes): + if 'x' in nodes.columns and 'y' in nodes.columns: + points = np.asarray(nodes[['x', 'y']]) + else: + points = np.asarray(np.random.random((len(nodes), 2))) + return points - In a NetworkX graph, each edge can have its own independent attributes. One - edge can have a different set of attributes than another edge. This means - we have to assign a default weight value when converting to dataframes. - """ - nodes = pd.DataFrame() - for node, xy in zip(graph, pos): - nodes = nodes.append({'id': node, 'x': xy[0], 'y': xy[1]}, ignore_index=True) - nodes['id'].astype(np.int32) - nodes = nodes.set_index('id') +def _convert_edges_to_sparse_matrix(edges): + nedges = len(edges) + + if 'weight' in edges: + weights = edges['weights'] + else: + weights = np.ones(nedges) - edges = pd.DataFrame() - for edge in graph.edges(): - edge_attributes = graph[edge[0]][edge[1]] - if 'weight' in edge_attributes: - weight = edge_attributes['weight'] - else: - weight = 1 - edges = edges.append({'source': edge[0], 'target': edge[1], 'weight': weight}, ignore_index=True) + A = sp.sparse.coo_matrix((weights, (edges['source'], edges['target'])), shape=(nedges, nedges)) + return A.tolil() - edges['source'].astype(np.int32) - edges['target'].astype(np.int32) - return nodes, edges +def _merge_points_with_nodes(nodes, points): + nodes['x'] = points[:, 0] + nodes['y'] = points[:, 1] + return nodes class forceatlas2_layout(param.ParameterizedFunction): @@ -55,18 +51,18 @@ class forceatlas2_layout(param.ParameterizedFunction): http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0098679&type=printable """ - def __call__(self, graph, iterations=10, linlog=False, pos=None, nohubs=False, k=None, dim=2): + def __call__(self, nodes, edges, iterations=10, linlog=False, nohubs=False, k=None, dim=2): """ Parameters ---------- - graph : networkx.Graph - The NetworkX graph to layout + nodes : pandas.DataFrame + The nodes of a graph + edges : pandas.DataFrame + The edges of a graph iterations : int Number of iterations linlog : bool Whether to use logarithmic attraction force - pos : ndarray - Initial positions for the given nodes nohubs : bool Whether to grant authorities (nodes with a high indegree) a more central position than hubs (nodes with a high outdegree) @@ -79,25 +75,13 @@ def __call__(self, graph, iterations=10, linlog=False, pos=None, nohubs=False, k Returns ------- - nodes, edges : pandas.DataFrame + nodes : pandas.DataFrame """ - try: - import networkx as nx - except ImportError: - raise ImportError('install networkx to use this feature') - - # This comes from the sparse FR layout in NetworkX - A = nx.to_scipy_sparse_matrix(graph, dtype='f') - nnodes, _ = A.shape - - try: - A = A.tolil() - except Exception: - A = (sp.sparse.coo_matrix(A)).tolil() - if pos is None: - pos = np.asarray(np.random.random((nnodes, dim)), dtype=A.dtype) - else: - pos = pos.astype(A.dtype) + + nnodes = len(nodes) + points = _extract_points_from_nodes(nodes) + A = _convert_edges_to_sparse_matrix(edges) + if k is None: k = np.sqrt(1.0 / nnodes) @@ -113,7 +97,7 @@ def __call__(self, graph, iterations=10, linlog=False, pos=None, nohubs=False, k displacement *= 0 for i in range(A.shape[0]): # difference between this row's node position and all others - delta = (pos[i] - pos).T + delta = (points[i] - points).T # distance between points distance = np.sqrt((delta ** 2).sum(axis=0)) @@ -133,13 +117,13 @@ def __call__(self, graph, iterations=10, linlog=False, pos=None, nohubs=False, k dist = np.log(dist + 1) displacement[:, i] += (delta * (dist - ai * distance / k)).sum(axis=1) - # update positions + # update points length = np.sqrt((displacement ** 2).sum(axis=0)) length = np.where(length < 0.01, 0.01, length) - pos += (displacement * t / length).T + points += (displacement * t / length).T # cool temperature t -= dt - # Return the layout - return _convert_graph_with_positions_to_dataframes(graph, pos) + # Return the nodes with updated positions + return _merge_points_with_nodes(nodes, points) From f7d4b27c2e494b0b46db0d7a200e9aa98a137082 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Sun, 9 Jul 2017 00:00:53 -0400 Subject: [PATCH 4/7] Parameterize the layout algorithm --- datashader/layout.py | 47 ++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/datashader/layout.py b/datashader/layout.py index 55c32b445..84a9af258 100644 --- a/datashader/layout.py +++ b/datashader/layout.py @@ -51,33 +51,28 @@ class forceatlas2_layout(param.ParameterizedFunction): http://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0098679&type=printable """ - def __call__(self, nodes, edges, iterations=10, linlog=False, nohubs=False, k=None, dim=2): - """ - Parameters - ---------- - nodes : pandas.DataFrame - The nodes of a graph - edges : pandas.DataFrame - The edges of a graph - iterations : int - Number of iterations - linlog : bool - Whether to use logarithmic attraction force - nohubs : bool - Whether to grant authorities (nodes with a high indegree) a - more central position than hubs (nodes with a high outdegree) - k : float - Compensates for the repulsion for nodes that are far away - from the center. Defaults to the inverse of the number of - nodes. - dim : int - Coordinate dimensions of each node. - - Returns - ------- - nodes : pandas.DataFrame - """ + iterations = param.Integer(default=10, bounds=(1, None), doc=""" + Number of passes for the layout algorithm""") + linlog = param.Boolean(False, doc=""" + Whether to use logarithmic attraction force""") + + nohubs = param.Boolean(False, doc=""" + Whether to grant authorities (nodes with a high indegree) a + more central position than hubs (nodes with a high outdegree)""") + + k = param.Number(default=None, doc=""" + Compensates for the repulsion for nodes that are far away + from the center. Defaults to the inverse of the number of + nodes.""") + + dim = param.Integer(default=2, bounds=(1, None), doc=""" + Coordinate dimensions of each node""") + + def __call__(self, nodes, edges, **params): + p = param.ParamOverrides(self, params) + + # Convert graph into sparse adjacency matrix and array of points nnodes = len(nodes) points = _extract_points_from_nodes(nodes) A = _convert_edges_to_sparse_matrix(edges) From 869f488aabe01eaf8abfcf4fcd6c832cafe024df Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Sun, 9 Jul 2017 00:13:09 -0400 Subject: [PATCH 5/7] Fix parameter references --- datashader/layout.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datashader/layout.py b/datashader/layout.py index 84a9af258..dccf27f75 100644 --- a/datashader/layout.py +++ b/datashader/layout.py @@ -77,8 +77,8 @@ def __call__(self, nodes, edges, **params): points = _extract_points_from_nodes(nodes) A = _convert_edges_to_sparse_matrix(edges) - if k is None: - k = np.sqrt(1.0 / nnodes) + if p.k is None: + p.k = np.sqrt(1.0 / nnodes) # the initial "temperature" is about .1 of domain area (=1x1) # this is the largest step allowed in the dynamics. @@ -86,9 +86,9 @@ def __call__(self, nodes, edges, **params): # simple cooling scheme. # linearly step down by dt on each iteration so last iteration is size dt. - dt = t / float(iterations + 1) - displacement = np.zeros((dim, nnodes)) - for iteration in range(iterations): + dt = t / float(p.iterations + 1) + displacement = np.zeros((p.dim, nnodes)) + for iteration in range(p.iterations): displacement *= 0 for i in range(A.shape[0]): # difference between this row's node position and all others @@ -104,13 +104,13 @@ def __call__(self, nodes, edges, **params): ai = np.asarray(A.getrowview(i).toarray()) # displacement "force" - dist = k * k / distance ** 2 + dist = p.k * p.k / distance ** 2 - if nohubs: + if p.nohubs: dist = dist / float(ai.sum(axis=1) + 1) - if linlog: + if p.linlog: dist = np.log(dist + 1) - displacement[:, i] += (delta * (dist - ai * distance / k)).sum(axis=1) + displacement[:, i] += (delta * (dist - ai * distance / p.k)).sum(axis=1) # update points length = np.sqrt((displacement ** 2).sum(axis=0)) From 2fa1deb11b2fa716ecca44f43a7d382cb5947c82 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Sun, 9 Jul 2017 22:44:09 -0400 Subject: [PATCH 6/7] Add tests --- datashader/layout.py | 15 ++++---- datashader/tests/test_layout.py | 65 +++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 datashader/tests/test_layout.py diff --git a/datashader/layout.py b/datashader/layout.py index dccf27f75..e138feafb 100644 --- a/datashader/layout.py +++ b/datashader/layout.py @@ -24,21 +24,20 @@ def _extract_points_from_nodes(nodes): def _convert_edges_to_sparse_matrix(edges): - nedges = len(edges) - if 'weight' in edges: - weights = edges['weights'] + weights = edges['weight'] else: - weights = np.ones(nedges) + weights = np.ones(len(edges)) - A = sp.sparse.coo_matrix((weights, (edges['source'], edges['target'])), shape=(nedges, nedges)) + A = sp.sparse.coo_matrix((weights, (edges['source'], edges['target']))) return A.tolil() def _merge_points_with_nodes(nodes, points): - nodes['x'] = points[:, 0] - nodes['y'] = points[:, 1] - return nodes + n = nodes.copy() + n['x'] = points[:, 0] + n['y'] = points[:, 1] + return n class forceatlas2_layout(param.ParameterizedFunction): diff --git a/datashader/tests/test_layout.py b/datashader/tests/test_layout.py new file mode 100644 index 000000000..ca0739fa4 --- /dev/null +++ b/datashader/tests/test_layout.py @@ -0,0 +1,65 @@ +import pytest + +import numpy as np +import pandas as pd + +from datashader.layout import forceatlas2_layout + + +@pytest.fixture +def nodes(): + # Four nodes arranged at the corners of a 200x200 square with one node + # at the center + nodes_df = pd.DataFrame({'id': np.arange(5), + 'x': [0., -100., 100., -100., 100.], + 'y': [0., 100., 100., -100., -100.]}) + return nodes_df.set_index('id') + + +@pytest.fixture +def nodes_without_positions(): + nodes_df = pd.DataFrame({'id': np.arange(5)}) + return nodes_df.set_index('id') + + +@pytest.fixture +def edges(): + # Four edges originating from the center node and connected to each + # corner + edges_df = pd.DataFrame({'id': np.arange(4), + 'source': np.zeros(4, dtype=np.int64), + 'target': np.arange(1, 5)}) + return edges_df.set_index('id') + + +@pytest.fixture +def weighted_edges(): + # Four weighted edges originating from the center node and connected + # to each corner + edges_df = pd.DataFrame({'id': np.arange(4), + 'source': np.zeros(4, dtype=np.int64), + 'target': np.arange(1, 5), + 'weight': np.ones(4)}) + return edges_df.set_index('id') + + +def test_forceatlas2_positioned_nodes_with_unweighted_edges(nodes, edges): + df = forceatlas2_layout(nodes, edges) + assert df.equals(nodes) + + +def test_forceatlas2_positioned_nodes_with_weighted_edges(nodes, weighted_edges): + df = forceatlas2_layout(nodes, weighted_edges) + assert df.equals(nodes) + + +def test_forceatlas2_unpositioned_nodes_with_unweighted_edges(nodes_without_positions, edges): + df = forceatlas2_layout(nodes_without_positions, edges) + assert len(nodes_without_positions) == len(df) + assert not df.equals(nodes_without_positions) + + +def test_forceatlas2_unpositioned_nodes_with_weighted_edges(nodes_without_positions, weighted_edges): + df = forceatlas2_layout(nodes_without_positions, weighted_edges) + assert len(nodes_without_positions) == len(df) + assert not df.equals(nodes_without_positions) From e9171075d11609137787d524c349994f87bef3a6 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Mon, 10 Jul 2017 09:55:05 -0400 Subject: [PATCH 7/7] Numba-ize cooling scheme --- datashader/layout.py | 80 +++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/datashader/layout.py b/datashader/layout.py index e138feafb..f15c71f01 100644 --- a/datashader/layout.py +++ b/datashader/layout.py @@ -9,8 +9,8 @@ from __future__ import absolute_import, division, print_function +import numba as nb import numpy as np -import pandas as pd import param import scipy as sp @@ -40,6 +40,43 @@ def _merge_points_with_nodes(nodes, points): return n +@nb.jit(nogil=True) +def cooling(matrix, points, temperature, params): + dt = temperature / float(params.iterations + 1) + displacement = np.zeros((params.dim, len(points))) + for iteration in range(params.iterations): + displacement *= 0 + for i in range(matrix.shape[0]): + # difference between this row's node position and all others + delta = (points[i] - points).T + + # distance between points + distance = np.sqrt((delta ** 2).sum(axis=0)) + + # enforce minimum distance of 0.01 + distance = np.where(distance < 0.01, 0.01, distance) + + # the adjacency matrix row + ai = np.asarray(matrix.getrowview(i).toarray()) + + # displacement "force" + dist = params.k * params.k / distance ** 2 + + if params.nohubs: + dist = dist / float(ai.sum(axis=1) + 1) + if params.linlog: + dist = np.log(dist + 1) + displacement[:, i] += (delta * (dist - ai * distance / params.k)).sum(axis=1) + + # update points + length = np.sqrt((displacement ** 2).sum(axis=0)) + length = np.where(length < 0.01, 0.01, length) + points += (displacement * temperature / length).T + + # cool temperature + temperature -= dt + + class forceatlas2_layout(param.ParameterizedFunction): """ Assign coordinates to the nodes of a graph. @@ -72,52 +109,19 @@ def __call__(self, nodes, edges, **params): p = param.ParamOverrides(self, params) # Convert graph into sparse adjacency matrix and array of points - nnodes = len(nodes) points = _extract_points_from_nodes(nodes) - A = _convert_edges_to_sparse_matrix(edges) + matrix = _convert_edges_to_sparse_matrix(edges) if p.k is None: - p.k = np.sqrt(1.0 / nnodes) + p.k = np.sqrt(1.0 / len(points)) # the initial "temperature" is about .1 of domain area (=1x1) # this is the largest step allowed in the dynamics. - t = 0.1 + temperature = 0.1 # simple cooling scheme. # linearly step down by dt on each iteration so last iteration is size dt. - dt = t / float(p.iterations + 1) - displacement = np.zeros((p.dim, nnodes)) - for iteration in range(p.iterations): - displacement *= 0 - for i in range(A.shape[0]): - # difference between this row's node position and all others - delta = (points[i] - points).T - - # distance between points - distance = np.sqrt((delta ** 2).sum(axis=0)) - - # enforce minimum distance of 0.01 - distance = np.where(distance < 0.01, 0.01, distance) - - # the adjacency matrix row - ai = np.asarray(A.getrowview(i).toarray()) - - # displacement "force" - dist = p.k * p.k / distance ** 2 - - if p.nohubs: - dist = dist / float(ai.sum(axis=1) + 1) - if p.linlog: - dist = np.log(dist + 1) - displacement[:, i] += (delta * (dist - ai * distance / p.k)).sum(axis=1) - - # update points - length = np.sqrt((displacement ** 2).sum(axis=0)) - length = np.where(length < 0.01, 0.01, length) - points += (displacement * t / length).T - - # cool temperature - t -= dt + cooling(matrix, points, temperature, p) # Return the nodes with updated positions return _merge_points_with_nodes(nodes, points)