Skip to content

Commit

Permalink
Merge branch 'branch-0.18' of github.com:rapidsai/cugraph into fea_in…
Browse files Browse the repository at this point in the history
…duced_subgraph
  • Loading branch information
seunghwak committed Jan 22, 2021
2 parents 42a886c + 20d2a5b commit 92b0e5d
Show file tree
Hide file tree
Showing 18 changed files with 320 additions and 154 deletions.
16 changes: 15 additions & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
# Order matters - match of highest importance goes last (last match wins)

#doc code owners
datasets/ @rapidsai/cugraph-doc-codeowners
notebooks/ @rapidsai/cugraph-doc-codeowners
docs/ @rapidsai/cugraph-doc-codeowners
**/*.txt @rapidsai/cugraph-doc-codeowners
**/*.md @rapidsai/cugraph-doc-codeowners
**/*.rst @rapidsai/cugraph-doc-codeowners
**/*.ipynb @rapidsai/cugraph-doc-codeowners
**/*.pdf @rapidsai/cugraph-doc-codeowners
**/*.png @rapidsai/cugraph-doc-codeowners

#cpp code owners
cpp/ @rapidsai/cugraph-cpp-codeowners

Expand All @@ -9,7 +23,7 @@ python/ @rapidsai/cugraph-python-codeowners
**/cmake/ @rapidsai/cugraph-cmake-codeowners

#build/ops code owners
.github/ @rapidsai/ops-codeowners
.github/ @rapidsai/ops-codeowners
ci/ @rapidsai/ops-codeowners
conda/ @rapidsai/ops-codeowners
**/Dockerfile @rapidsai/ops-codeowners
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
- PR #1279 Add self loop check variable in graph
- PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS
- PR #1278 Add support for shortest_path_length and fix graph vertex checks
- PR #1280 Add Multi(Di)Graph support

## Improvements
- PR #1227 Pin cmake policies to cmake 3.17 version
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ else(DEFINED ENV{RAFT_PATH})

ExternalProject_Add(raft
GIT_REPOSITORY https://github.com/rapidsai/raft.git
GIT_TAG f75d7b437bf1da3df749108161b8a0505fb6b7b3
GIT_TAG 9dbf2c8a9134ce8135f7fe947ec523d874fcab6a
PREFIX ${RAFT_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
Expand Down
2 changes: 2 additions & 0 deletions python/cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from cugraph.structure import (
Graph,
DiGraph,
MultiGraph,
MultiDiGraph,
from_edgelist,
from_cudf_edgelist,
from_pandas_edgelist,
Expand Down
10 changes: 5 additions & 5 deletions python/cugraph/comms/comms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2020, NVIDIA CORPORATION.
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand All @@ -12,7 +12,7 @@
# limitations under the License.

from cugraph.raft.dask.common.comms import Comms as raftComms
from cugraph.raft.dask.common.comms import worker_state
from cugraph.raft.dask.common.comms import get_raft_comm_state
from cugraph.raft.common.handle import Handle
from cugraph.comms.comms_wrapper import init_subcomms as c_init_subcomms
from dask.distributed import default_client
Expand Down Expand Up @@ -196,12 +196,12 @@ def get_default_handle():
# Functions to be called from within workers

def get_handle(sID):
sessionstate = worker_state(sID)
sessionstate = get_raft_comm_state(sID)
return sessionstate['handle']


def get_worker_id(sID):
sessionstate = worker_state(sID)
sessionstate = get_raft_comm_state(sID)
return sessionstate['wid']


Expand All @@ -216,5 +216,5 @@ def get_n_workers(sID=None):
if sID is None:
return read_utils.get_n_workers()
else:
sessionstate = worker_state(sID)
sessionstate = get_raft_comm_state(sID)
return sessionstate['nworkers']
2 changes: 1 addition & 1 deletion python/cugraph/structure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from cugraph.structure.graph import Graph, DiGraph
from cugraph.structure.graph import Graph, DiGraph, MultiGraph, MultiDiGraph
from cugraph.structure.number_map import NumberMap
from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf
from cugraph.structure.convert_matrix import (from_edgelist,
Expand Down
73 changes: 40 additions & 33 deletions python/cugraph/structure/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def __init__(self, offsets, indices, value=None):
def __init__(
self,
m_graph=None,
edge_attr=None,
symmetrized=False,
bipartite=False,
multi=False,
Expand Down Expand Up @@ -112,24 +111,22 @@ def __init__(
self.batch_transposed_adjlists = None

if m_graph is not None:
if (type(self) is Graph and type(m_graph) is MultiGraph) or (
type(self) is DiGraph and type(m_graph) is MultiDiGraph
):
self.from_cudf_edgelist(
m_graph.edgelist.edgelist_df,
source="src",
destination="dst",
edge_attr=edge_attr,
)
self.renumbered = m_graph.renumbered
self.renumber_map = m_graph.renumber_map
if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph:
elist = m_graph.view_edge_list()
if m_graph.edgelist.weights:
weights = "weights"
else:
weights = None
self.from_cudf_edgelist(elist,
source="src",
destination="dst",
edge_attr=weights)
else:
msg = (
"Graph can be initialized using MultiGraph "
"and DiGraph can be initialized using MultiDiGraph"
"Graph can only be initialized using MultiGraph "
"or MultiDiGraph"
)
raise Exception(msg)
# self.number_of_vertices = None

def enable_batch(self):
client = mg_utils.get_client()
Expand Down Expand Up @@ -277,6 +274,12 @@ def is_multipartite(self):
# TO DO: Call coloring algorithm
return self.multipartite or self.bipartite

def is_multigraph(self):
"""
Returns True if the graph is a multigraph. Else returns False.
"""
return self.multi

def sets(self):
"""
Returns the bipartite set of nodes. This solely relies on the user's
Expand Down Expand Up @@ -408,24 +411,19 @@ def from_cudf_edgelist(
source_col = elist[source]
dest_col = elist[destination]

if self.multi:
if type(edge_attr) is not list:
raise Exception("edge_attr should be a list of column names")
value_col = {}
for col_name in edge_attr:
value_col[col_name] = elist[col_name]
elif edge_attr is not None:
if edge_attr is not None:
value_col = elist[edge_attr]
else:
value_col = None

if not self.symmetrized and not self.multi:
if value_col is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col
)
else:
source_col, dest_col = symmetrize(source_col, dest_col)
if value_col is not None:
source_col, dest_col, value_col = symmetrize(
source_col, dest_col, value_col, multi=self.multi,
symmetrize=not self.symmetrized)
else:
source_col, dest_col = symmetrize(
source_col, dest_col, multi=self.multi,
symmetrize=not self.symmetrized)

self.edgelist = Graph.EdgeList(source_col, dest_col, value_col)

Expand Down Expand Up @@ -700,7 +698,7 @@ def view_edge_list(self):
edgelist_df = self.unrenumber(edgelist_df, "src")
edgelist_df = self.unrenumber(edgelist_df, "dst")

if type(self) is Graph:
if type(self) is Graph or type(self) is MultiGraph:
edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]]
edgelist_df = edgelist_df.reset_index(drop=True)
self.edge_count = len(edgelist_df)
Expand Down Expand Up @@ -992,7 +990,7 @@ def number_of_edges(self, directed_edges=False):
return len(self.edgelist.edgelist_df)
if self.edge_count is None:
if self.edgelist is not None:
if type(self) is Graph:
if type(self) is Graph or type(self) is MultiGraph:
self.edge_count = len(
self.edgelist.edgelist_df[
self.edgelist.edgelist_df["src"]
Expand Down Expand Up @@ -1486,17 +1484,26 @@ def add_internal_vertex_id(


class DiGraph(Graph):
def __init__(self, m_graph=None, edge_attr=None):
"""
cuGraph directed graph class. Drops parallel edges.
"""
def __init__(self, m_graph=None):
super().__init__(
m_graph=m_graph, edge_attr=edge_attr, symmetrized=True
m_graph=m_graph, symmetrized=True
)


class MultiGraph(Graph):
"""
cuGraph class to create and store undirected graphs with parallel edges.
"""
def __init__(self, renumbered=True):
super().__init__(multi=True)


class MultiDiGraph(Graph):
"""
cuGraph class to create and store directed graphs with parallel edges.
"""
def __init__(self, renumbered=True):
super().__init__(symmetrized=True, multi=True)
55 changes: 37 additions & 18 deletions python/cugraph/structure/symmetrize.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import dask_cudf


def symmetrize_df(df, src_name, dst_name):
def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True):
"""
Take a COO stored in a DataFrame, along with the column names of
the source and destination columns and create a new data frame
Expand All @@ -42,6 +42,13 @@ def symmetrize_df(df, src_name, dst_name):
Name of the column in the data frame containing the source ids
dst_name : string
Name of the column in the data frame containing the destination ids
multi : bool
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
symmetrize : bool
Default is True to perform symmetrization. If False only duplicate
edges are dropped.
Examples
--------
>>> import cugraph.dask as dcg
Expand All @@ -54,26 +61,30 @@ def symmetrize_df(df, src_name, dst_name):
>>> sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight")
>>> Comms.destroy()
"""
gdf = cudf.DataFrame()

#
# Now append the columns. We add sources to the end of destinations,
# and destinations to the end of sources. Otherwise we append a
# column onto itself.
#
for idx, name in enumerate(df.columns):
if name == src_name:
gdf[src_name] = df[src_name].append(
df[dst_name], ignore_index=True
)
elif name == dst_name:
gdf[dst_name] = df[dst_name].append(
df[src_name], ignore_index=True
)
else:
gdf[name] = df[name].append(df[name], ignore_index=True)

return gdf.groupby(by=[src_name, dst_name], as_index=False).min()
if symmetrize:
gdf = cudf.DataFrame()
for idx, name in enumerate(df.columns):
if name == src_name:
gdf[src_name] = df[src_name].append(
df[dst_name], ignore_index=True
)
elif name == dst_name:
gdf[dst_name] = df[dst_name].append(
df[src_name], ignore_index=True
)
else:
gdf[name] = df[name].append(df[name], ignore_index=True)
else:
gdf = df
if multi:
return gdf
else:
return gdf.groupby(by=[src_name, dst_name], as_index=False).min()


def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
Expand Down Expand Up @@ -105,6 +116,12 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
Name of the column in the data frame containing the source ids
dst_name : string
Name of the column in the data frame containing the destination ids
multi : bool
Set to True if graph is a Multi(Di)Graph. This allows multiple
edges instead of dropping them.
symmetrize : bool
Default is True to perform symmetrization. If False only duplicate
edges are dropped.
Examples
--------
Expand All @@ -129,7 +146,8 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None):
return result


def symmetrize(source_col, dest_col, value_col=None):
def symmetrize(source_col, dest_col, value_col=None, multi=False,
symmetrize=True):
"""
Take a COO set of source destination pairs along with associated values
stored in a single GPU or distributed
Expand Down Expand Up @@ -190,7 +208,8 @@ def symmetrize(source_col, dest_col, value_col=None):
input_df, "source", "destination", weight_name
).persist()
else:
output_df = symmetrize_df(input_df, "source", "destination")
output_df = symmetrize_df(input_df, "source", "destination", multi,
symmetrize)

if value_col is not None:
return (
Expand Down
Loading

0 comments on commit 92b0e5d

Please sign in to comment.